2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_format_pax.c,v 1.39 2007/03/03 07:37:36 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
33 #include <sys/mkdev.h>
35 #ifdef MAJOR_IN_SYSMACROS
36 #include <sys/sysmacros.h>
53 #include "archive_entry.h"
54 #include "archive_private.h"
55 #include "archive_write_private.h"
58 uint64_t entry_bytes_remaining
;
59 uint64_t entry_padding
;
60 struct archive_string pax_header
;
63 static void add_pax_attr(struct archive_string
*, const char *key
,
65 static void add_pax_attr_int(struct archive_string
*,
66 const char *key
, int64_t value
);
67 static void add_pax_attr_time(struct archive_string
*,
68 const char *key
, int64_t sec
,
70 static void add_pax_attr_w(struct archive_string
*,
71 const char *key
, const wchar_t *wvalue
);
72 static ssize_t
archive_write_pax_data(struct archive_write
*,
73 const void *, size_t);
74 static int archive_write_pax_finish(struct archive_write
*);
75 static int archive_write_pax_destroy(struct archive_write
*);
76 static int archive_write_pax_finish_entry(struct archive_write
*);
77 static int archive_write_pax_header(struct archive_write
*,
78 struct archive_entry
*);
79 static char *base64_encode(const char *src
, size_t len
);
80 static char *build_pax_attribute_name(char *dest
, const char *src
);
81 static char *build_ustar_entry_name(char *dest
, const char *src
,
82 size_t src_length
, const char *insert
);
83 static char *format_int(char *dest
, int64_t);
84 static int has_non_ASCII(const wchar_t *);
85 static char *url_encode(const char *in
);
86 static int write_nulls(struct archive_write
*, size_t);
89 * Set output format to 'restricted pax' format.
91 * This is the same as normal 'pax', but tries to suppress
92 * the pax header whenever possible. This is the default for
93 * bsdtar, for instance.
96 archive_write_set_format_pax_restricted(struct archive
*_a
)
98 struct archive_write
*a
= (struct archive_write
*)_a
;
100 r
= archive_write_set_format_pax(&a
->archive
);
101 a
->archive_format
= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
;
102 a
->archive_format_name
= "restricted POSIX pax interchange";
107 * Set output format to 'pax' format.
110 archive_write_set_format_pax(struct archive
*_a
)
112 struct archive_write
*a
= (struct archive_write
*)_a
;
115 if (a
->format_destroy
!= NULL
)
116 (a
->format_destroy
)(a
);
118 pax
= (struct pax
*)malloc(sizeof(*pax
));
120 archive_set_error(&a
->archive
, ENOMEM
, "Can't allocate pax data");
121 return (ARCHIVE_FATAL
);
123 memset(pax
, 0, sizeof(*pax
));
124 a
->format_data
= pax
;
126 a
->pad_uncompressed
= 1;
127 a
->format_write_header
= archive_write_pax_header
;
128 a
->format_write_data
= archive_write_pax_data
;
129 a
->format_finish
= archive_write_pax_finish
;
130 a
->format_destroy
= archive_write_pax_destroy
;
131 a
->format_finish_entry
= archive_write_pax_finish_entry
;
132 a
->archive_format
= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
;
133 a
->archive_format_name
= "POSIX pax interchange";
138 * Note: This code assumes that 'nanos' has the same sign as 'sec',
139 * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
140 * and not -0.8 seconds. This is a pretty pedantic point, as we're
141 * unlikely to encounter many real files created before Jan 1, 1970,
142 * much less ones with timestamps recorded to sub-second resolution.
145 add_pax_attr_time(struct archive_string
*as
, const char *key
,
146 int64_t sec
, unsigned long nanos
)
151 * Note that each byte contributes fewer than 3 base-10
152 * digits, so this will always be big enough.
154 char tmp
[1 + 3*sizeof(sec
) + 1 + 3*sizeof(nanos
)];
156 tmp
[sizeof(tmp
) - 1] = 0;
157 t
= tmp
+ sizeof(tmp
) - 1;
159 /* Skip trailing zeros in the fractional part. */
160 for (digit
= 0, i
= 10; i
> 0 && digit
== 0; i
--) {
165 /* Only format the fraction if it's non-zero. */
168 *--t
= "0123456789"[digit
];
175 t
= format_int(t
, sec
);
177 add_pax_attr(as
, key
, t
);
181 format_int(char *t
, int64_t i
)
192 *--t
= "0123456789"[i
% 10];
200 add_pax_attr_int(struct archive_string
*as
, const char *key
, int64_t value
)
202 char tmp
[1 + 3 * sizeof(value
)];
204 tmp
[sizeof(tmp
) - 1] = 0;
205 add_pax_attr(as
, key
, format_int(tmp
+ sizeof(tmp
) - 1, value
));
209 utf8_encode(const wchar_t *wval
)
214 char *utf8_value
, *p
;
217 for (wp
= wval
; *wp
!= L
'\0'; ) {
221 else if (wc
<= 0x7ff)
223 else if (wc
<= 0xffff)
225 else if (wc
<= 0x1fffff)
227 else if (wc
<= 0x3ffffff)
229 else if (wc
<= 0x7fffffff)
231 /* Ignore larger values; UTF-8 can't encode them. */
234 utf8_value
= (char *)malloc(utf8len
+ 1);
235 if (utf8_value
== NULL
) {
236 __archive_errx(1, "Not enough memory for attributes");
240 for (wp
= wval
, p
= utf8_value
; *wp
!= L
'\0'; ) {
244 } else if (wc
<= 0x7ff) {
245 p
[0] = 0xc0 | ((wc
>> 6) & 0x1f);
246 p
[1] = 0x80 | (wc
& 0x3f);
248 } else if (wc
<= 0xffff) {
249 p
[0] = 0xe0 | ((wc
>> 12) & 0x0f);
250 p
[1] = 0x80 | ((wc
>> 6) & 0x3f);
251 p
[2] = 0x80 | (wc
& 0x3f);
253 } else if (wc
<= 0x1fffff) {
254 p
[0] = 0xf0 | ((wc
>> 18) & 0x07);
255 p
[1] = 0x80 | ((wc
>> 12) & 0x3f);
256 p
[2] = 0x80 | ((wc
>> 6) & 0x3f);
257 p
[3] = 0x80 | (wc
& 0x3f);
259 } else if (wc
<= 0x3ffffff) {
260 p
[0] = 0xf8 | ((wc
>> 24) & 0x03);
261 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
262 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
263 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
264 p
[4] = 0x80 | (wc
& 0x3f);
266 } else if (wc
<= 0x7fffffff) {
267 p
[0] = 0xfc | ((wc
>> 30) & 0x01);
268 p
[1] = 0x80 | ((wc
>> 24) & 0x3f);
269 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
270 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
271 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
272 p
[4] = 0x80 | (wc
& 0x3f);
275 /* Ignore larger values; UTF-8 can't encode them. */
283 add_pax_attr_w(struct archive_string
*as
, const char *key
, const wchar_t *wval
)
285 char *utf8_value
= utf8_encode(wval
);
286 if (utf8_value
== NULL
)
288 add_pax_attr(as
, key
, utf8_value
);
293 * Add a key/value attribute to the pax header. This function handles
294 * the length field and various other syntactic requirements.
297 add_pax_attr(struct archive_string
*as
, const char *key
, const char *value
)
299 int digits
, i
, len
, next_ten
;
300 char tmp
[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */
303 * PAX attributes have the following layout:
304 * <len> <space> <key> <=> <value> <nl>
306 len
= 1 + strlen(key
) + 1 + strlen(value
) + 1;
309 * The <len> field includes the length of the <len> field, so
310 * computing the correct length is tricky. I start by
311 * counting the number of base-10 digits in 'len' and
312 * computing the next higher power of 10.
320 next_ten
= next_ten
* 10;
323 * For example, if string without the length field is 99
324 * chars, then adding the 2 digit length "99" will force the
325 * total length past 100, requiring an extra digit. The next
326 * statement adjusts for this effect.
328 if (len
+ digits
>= next_ten
)
331 /* Now, we have the right length so we can build the line. */
332 tmp
[sizeof(tmp
) - 1] = 0; /* Null-terminate the work area. */
333 archive_strcat(as
, format_int(tmp
+ sizeof(tmp
) - 1, len
+ digits
));
334 archive_strappend_char(as
, ' ');
335 archive_strcat(as
, key
);
336 archive_strappend_char(as
, '=');
337 archive_strcat(as
, value
);
338 archive_strappend_char(as
, '\n');
342 archive_write_pax_header_xattrs(struct pax
*pax
, struct archive_entry
*entry
)
344 struct archive_string s
;
345 int i
= archive_entry_xattr_reset(entry
);
351 char *url_encoded_name
= NULL
, *encoded_name
= NULL
;
352 wchar_t *wcs_name
= NULL
;
355 archive_entry_xattr_next(entry
, &name
, &value
, &size
);
356 /* Name is URL-encoded, then converted to wchar_t,
357 * then UTF-8 encoded. */
358 url_encoded_name
= url_encode(name
);
359 if (url_encoded_name
!= NULL
) {
360 /* Convert narrow-character to wide-character. */
361 int wcs_length
= strlen(url_encoded_name
);
362 wcs_name
= (wchar_t *)malloc((wcs_length
+ 1) * sizeof(wchar_t));
363 if (wcs_name
== NULL
)
364 __archive_errx(1, "No memory for xattr conversion");
365 mbstowcs(wcs_name
, url_encoded_name
, wcs_length
);
366 wcs_name
[wcs_length
] = 0;
367 free(url_encoded_name
); /* Done with this. */
369 if (wcs_name
!= NULL
) {
370 encoded_name
= utf8_encode(wcs_name
);
371 free(wcs_name
); /* Done with wchar_t name. */
374 encoded_value
= base64_encode((const char *)value
, size
);
376 if (encoded_name
!= NULL
&& encoded_value
!= NULL
) {
377 archive_string_init(&s
);
378 archive_strcpy(&s
, "LIBARCHIVE.xattr.");
379 archive_strcat(&s
, encoded_name
);
380 add_pax_attr(&(pax
->pax_header
), s
.s
, encoded_value
);
381 archive_string_free(&s
);
389 * TODO: Consider adding 'comment' and 'charset' fields to
390 * archive_entry so that clients can specify them. Also, consider
391 * adding generic key/value tags so clients can add arbitrary
395 archive_write_pax_header(struct archive_write
*a
,
396 struct archive_entry
*entry_original
)
398 struct archive_entry
*entry_main
;
399 const char *linkname
, *p
;
400 const char *hardlink
;
402 const char *suffix_start
;
403 int need_extension
, r
, ret
;
405 const struct stat
*st_main
, *st_original
;
409 char ustar_entry_name
[256];
410 char pax_entry_name
[256];
413 pax
= (struct pax
*)a
->format_data
;
415 st_original
= archive_entry_stat(entry_original
);
417 hardlink
= archive_entry_hardlink(entry_original
);
419 /* Make sure this is a type of entry that we can handle here */
420 if (hardlink
== NULL
) {
421 switch (st_original
->st_mode
& S_IFMT
) {
430 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
431 "tar format cannot archive socket");
432 return (ARCHIVE_WARN
);
434 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
435 "tar format cannot archive this (mode=0%lo)",
436 (unsigned long)st_original
->st_mode
);
437 return (ARCHIVE_WARN
);
441 /* Copy entry so we can modify it as needed. */
442 entry_main
= archive_entry_clone(entry_original
);
443 archive_string_empty(&(pax
->pax_header
)); /* Blank our work area. */
444 st_main
= archive_entry_stat(entry_main
);
447 * Determining whether or not the name is too big is ugly
448 * because of the rules for dividing names between 'name' and
449 * 'prefix' fields. Here, I pick out the longest possible
450 * suffix, then test whether the remaining prefix is too long.
452 wp
= archive_entry_pathname_w(entry_main
);
453 p
= archive_entry_pathname(entry_main
);
454 if (strlen(p
) <= 100) /* Short enough for just 'name' field */
455 suffix_start
= p
; /* Record a zero-length prefix */
457 /* Find the largest suffix that fits in 'name' field. */
458 suffix_start
= strchr(p
+ strlen(p
) - 100 - 1, '/');
461 * If name is too long, or has non-ASCII characters, add
462 * 'path' to pax extended attrs.
464 if (suffix_start
== NULL
|| suffix_start
- p
> 155 || has_non_ASCII(wp
)) {
465 add_pax_attr_w(&(pax
->pax_header
), "path", wp
);
466 archive_entry_set_pathname(entry_main
,
467 build_ustar_entry_name(ustar_entry_name
, p
, strlen(p
), NULL
));
471 /* If link name is too long or has non-ASCII characters, add
472 * 'linkpath' to pax extended attrs. */
474 if (linkname
== NULL
)
475 linkname
= archive_entry_symlink(entry_main
);
477 if (linkname
!= NULL
) {
478 /* There is a link name, get the wide version as well. */
479 if (hardlink
!= NULL
)
480 wp
= archive_entry_hardlink_w(entry_main
);
482 wp
= archive_entry_symlink_w(entry_main
);
484 /* If the link is long or has a non-ASCII character,
485 * store it as a pax extended attribute. */
486 if (strlen(linkname
) > 100 || has_non_ASCII(wp
)) {
487 add_pax_attr_w(&(pax
->pax_header
), "linkpath", wp
);
488 if (hardlink
!= NULL
)
489 archive_entry_set_hardlink(entry_main
,
490 "././@LongHardLink");
492 archive_entry_set_symlink(entry_main
,
498 /* If file size is too large, add 'size' to pax extended attrs. */
499 if (st_main
->st_size
>= (((int64_t)1) << 33)) {
500 add_pax_attr_int(&(pax
->pax_header
), "size", st_main
->st_size
);
504 /* If numeric GID is too large, add 'gid' to pax extended attrs. */
505 if (st_main
->st_gid
>= (1 << 18)) {
506 add_pax_attr_int(&(pax
->pax_header
), "gid", st_main
->st_gid
);
510 /* If group name is too large or has non-ASCII characters, add
511 * 'gname' to pax extended attrs. */
512 p
= archive_entry_gname(entry_main
);
513 wp
= archive_entry_gname_w(entry_main
);
514 if (p
!= NULL
&& (strlen(p
) > 31 || has_non_ASCII(wp
))) {
515 add_pax_attr_w(&(pax
->pax_header
), "gname", wp
);
516 archive_entry_set_gname(entry_main
, NULL
);
520 /* If numeric UID is too large, add 'uid' to pax extended attrs. */
521 if (st_main
->st_uid
>= (1 << 18)) {
522 add_pax_attr_int(&(pax
->pax_header
), "uid", st_main
->st_uid
);
526 /* If user name is too large, add 'uname' to pax extended attrs. */
527 /* TODO: If uname has non-ASCII characters, use pax attribute. */
528 p
= archive_entry_uname(entry_main
);
529 wp
= archive_entry_uname_w(entry_main
);
530 if (p
!= NULL
&& (strlen(p
) > 31 || has_non_ASCII(wp
))) {
531 add_pax_attr_w(&(pax
->pax_header
), "uname", wp
);
532 archive_entry_set_uname(entry_main
, NULL
);
537 * POSIX/SUSv3 doesn't provide a standard key for large device
538 * numbers. I use the same keys here that Joerg Schilling
539 * used for 'star.' (Which, somewhat confusingly, are called
540 * "devXXX" even though they code "rdev" values.) No doubt,
541 * other implementations use other keys. Note that there's no
542 * reason we can't write the same information into a number of
545 * Of course, this is only needed for block or char device entries.
547 if (S_ISBLK(st_main
->st_mode
) ||
548 S_ISCHR(st_main
->st_mode
)) {
550 * If rdevmajor is too large, add 'SCHILY.devmajor' to
551 * extended attributes.
553 dev_t rdevmajor
, rdevminor
;
554 rdevmajor
= major(st_main
->st_rdev
);
555 rdevminor
= minor(st_main
->st_rdev
);
556 if (rdevmajor
>= (1 << 18)) {
557 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devmajor",
560 * Non-strict formatting below means we don't
561 * have to truncate here. Not truncating improves
562 * the chance that some more modern tar archivers
563 * (such as GNU tar 1.13) can restore the full
564 * value even if they don't understand the pax
565 * extended attributes. See my rant below about
566 * file size fields for additional details.
568 /* archive_entry_set_rdevmajor(entry_main,
569 rdevmajor & ((1 << 18) - 1)); */
574 * If devminor is too large, add 'SCHILY.devminor' to
575 * extended attributes.
577 if (rdevminor
>= (1 << 18)) {
578 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devminor",
580 /* Truncation is not necessary here, either. */
581 /* archive_entry_set_rdevminor(entry_main,
582 rdevminor & ((1 << 18) - 1)); */
588 * Technically, the mtime field in the ustar header can
589 * support 33 bits, but many platforms use signed 32-bit time
590 * values. The cutoff of 0x7fffffff here is a compromise.
591 * Yes, this check is duplicated just below; this helps to
592 * avoid writing an mtime attribute just to handle a
593 * high-resolution timestamp in "restricted pax" mode.
595 if (!need_extension
&&
596 ((st_main
->st_mtime
< 0) || (st_main
->st_mtime
>= 0x7fffffff)))
599 /* I use a star-compatible file flag attribute. */
600 p
= archive_entry_fflags_text(entry_main
);
601 if (!need_extension
&& p
!= NULL
&& *p
!= '\0')
604 /* If there are non-trivial ACL entries, we need an extension. */
605 if (!need_extension
&& archive_entry_acl_count(entry_original
,
606 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
) > 0)
609 /* If there are non-trivial ACL entries, we need an extension. */
610 if (!need_extension
&& archive_entry_acl_count(entry_original
,
611 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
) > 0)
614 /* If there are extended attributes, we need an extension */
615 if (!need_extension
&& archive_entry_xattr_count(entry_original
) > 0)
619 * The following items are handled differently in "pax
620 * restricted" format. In particular, in "pax restricted"
621 * format they won't be added unless need_extension is
622 * already set (we're already generating an extended header, so
623 * may as well include these).
625 if (a
->archive_format
!= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
||
628 if (st_main
->st_mtime
< 0 ||
629 st_main
->st_mtime
>= 0x7fffffff ||
630 ARCHIVE_STAT_MTIME_NANOS(st_main
) != 0)
631 add_pax_attr_time(&(pax
->pax_header
), "mtime",
633 ARCHIVE_STAT_MTIME_NANOS(st_main
));
635 if (st_main
->st_ctime
!= 0 ||
636 ARCHIVE_STAT_CTIME_NANOS(st_main
) != 0)
637 add_pax_attr_time(&(pax
->pax_header
), "ctime",
639 ARCHIVE_STAT_CTIME_NANOS(st_main
));
641 if (st_main
->st_atime
!= 0 ||
642 ARCHIVE_STAT_ATIME_NANOS(st_main
) != 0)
643 add_pax_attr_time(&(pax
->pax_header
), "atime",
645 ARCHIVE_STAT_ATIME_NANOS(st_main
));
647 /* I use a star-compatible file flag attribute. */
648 p
= archive_entry_fflags_text(entry_main
);
649 if (p
!= NULL
&& *p
!= '\0')
650 add_pax_attr(&(pax
->pax_header
), "SCHILY.fflags", p
);
652 /* I use star-compatible ACL attributes. */
653 wp
= archive_entry_acl_text_w(entry_original
,
654 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
|
655 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
656 if (wp
!= NULL
&& *wp
!= L
'\0')
657 add_pax_attr_w(&(pax
->pax_header
),
658 "SCHILY.acl.access", wp
);
659 wp
= archive_entry_acl_text_w(entry_original
,
660 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
|
661 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
662 if (wp
!= NULL
&& *wp
!= L
'\0')
663 add_pax_attr_w(&(pax
->pax_header
),
664 "SCHILY.acl.default", wp
);
666 /* Include star-compatible metadata info. */
667 /* Note: "SCHILY.dev{major,minor}" are NOT the
668 * major/minor portions of "SCHILY.dev". */
669 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.dev",
671 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.ino",
673 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.nlink",
676 /* Store extended attributes */
677 archive_write_pax_header_xattrs(pax
, entry_original
);
680 /* Only regular files have data. */
681 if (!S_ISREG(archive_entry_mode(entry_main
)))
682 archive_entry_set_size(entry_main
, 0);
685 * Pax-restricted does not store data for hardlinks, in order
686 * to improve compatibility with ustar.
688 if (a
->archive_format
!= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
&&
690 archive_entry_set_size(entry_main
, 0);
693 * XXX Full pax interchange format does permit a hardlink
694 * entry to have data associated with it. I'm not supporting
695 * that here because the client expects me to tell them whether
696 * or not this format expects data for hardlinks. If I
697 * don't check here, then every pax archive will end up with
698 * duplicated data for hardlinks. Someday, there may be
699 * need to select this behavior, in which case the following
700 * will need to be revisited. XXX
702 if (hardlink
!= NULL
)
703 archive_entry_set_size(entry_main
, 0);
705 /* Format 'ustar' header for main entry.
707 * The trouble with file size: If the reader can't understand
708 * the file size, they may not be able to locate the next
709 * entry and the rest of the archive is toast. Pax-compliant
710 * readers are supposed to ignore the file size in the main
711 * header, so the question becomes how to maximize portability
712 * for readers that don't support pax attribute extensions.
713 * For maximum compatibility, I permit numeric extensions in
714 * the main header so that the file size stored will always be
715 * correct, even if it's in a format that only some
716 * implementations understand. The technique used here is:
718 * a) If possible, follow the standard exactly. This handles
719 * files up to 8 gigabytes minus 1.
721 * b) If that fails, try octal but omit the field terminator.
722 * That handles files up to 64 gigabytes minus 1.
724 * c) Otherwise, use base-256 extensions. That handles files
725 * up to 2^63 in this implementation, with the potential to
726 * go up to 2^94. That should hold us for a while. ;-)
728 * The non-strict formatter uses similar logic for other
729 * numeric fields, though they're less critical.
731 __archive_write_format_header_ustar(a
, ustarbuff
, entry_main
, -1, 0);
733 /* If we built any extended attributes, write that entry first. */
735 if (archive_strlen(&(pax
->pax_header
)) > 0) {
737 struct archive_entry
*pax_attr_entry
;
741 memset(&st
, 0, sizeof(st
));
742 pax_attr_entry
= archive_entry_new();
743 p
= archive_entry_pathname(entry_main
);
744 archive_entry_set_pathname(pax_attr_entry
,
745 build_pax_attribute_name(pax_entry_name
, p
));
746 st
.st_size
= archive_strlen(&(pax
->pax_header
));
747 /* Copy uid/gid (but clip to ustar limits). */
748 st
.st_uid
= st_main
->st_uid
;
749 if (st
.st_uid
>= 1 << 18)
750 st
.st_uid
= (1 << 18) - 1;
751 st
.st_gid
= st_main
->st_gid
;
752 if (st
.st_gid
>= 1 << 18)
753 st
.st_gid
= (1 << 18) - 1;
754 /* Copy mode over (but not setuid/setgid bits) */
755 st
.st_mode
= st_main
->st_mode
;
757 st
.st_mode
&= ~S_ISUID
;
760 st
.st_mode
&= ~S_ISGID
;
763 st
.st_mode
&= ~S_ISVTX
;
765 archive_entry_copy_stat(pax_attr_entry
, &st
);
767 /* Copy uname/gname. */
768 archive_entry_set_uname(pax_attr_entry
,
769 archive_entry_uname(entry_main
));
770 archive_entry_set_gname(pax_attr_entry
,
771 archive_entry_gname(entry_main
));
773 /* Copy mtime, but clip to ustar limits. */
774 s
= archive_entry_mtime(entry_main
);
775 ns
= archive_entry_mtime_nsec(entry_main
);
776 if (s
< 0) { s
= 0; ns
= 0; }
777 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
778 archive_entry_set_mtime(pax_attr_entry
, s
, ns
);
780 /* Ditto for atime. */
781 s
= archive_entry_atime(entry_main
);
782 ns
= archive_entry_atime_nsec(entry_main
);
783 if (s
< 0) { s
= 0; ns
= 0; }
784 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
785 archive_entry_set_atime(pax_attr_entry
, s
, ns
);
787 /* Standard ustar doesn't support ctime. */
788 archive_entry_set_ctime(pax_attr_entry
, 0, 0);
790 ret
= __archive_write_format_header_ustar(a
, paxbuff
,
791 pax_attr_entry
, 'x', 1);
793 archive_entry_free(pax_attr_entry
);
795 /* Note that the 'x' header shouldn't ever fail to format */
797 const char *msg
= "archive_write_pax_header: "
798 "'x' header failed?! This can't happen.\n";
799 write(2, msg
, strlen(msg
));
802 r
= (a
->compression_write
)(a
, paxbuff
, 512);
803 if (r
!= ARCHIVE_OK
) {
804 pax
->entry_bytes_remaining
= 0;
805 pax
->entry_padding
= 0;
806 return (ARCHIVE_FATAL
);
809 pax
->entry_bytes_remaining
= archive_strlen(&(pax
->pax_header
));
810 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
812 r
= (a
->compression_write
)(a
, pax
->pax_header
.s
,
813 archive_strlen(&(pax
->pax_header
)));
814 if (r
!= ARCHIVE_OK
) {
815 /* If a write fails, we're pretty much toast. */
816 return (ARCHIVE_FATAL
);
818 /* Pad out the end of the entry. */
819 r
= write_nulls(a
, pax
->entry_padding
);
820 if (r
!= ARCHIVE_OK
) {
821 /* If a write fails, we're pretty much toast. */
822 return (ARCHIVE_FATAL
);
824 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
827 /* Write the header for main entry. */
828 r
= (a
->compression_write
)(a
, ustarbuff
, 512);
833 * Inform the client of the on-disk size we're using, so
834 * they can avoid unnecessarily writing a body for something
835 * that we're just going to ignore.
837 archive_entry_set_size(entry_original
, archive_entry_size(entry_main
));
838 pax
->entry_bytes_remaining
= archive_entry_size(entry_main
);
839 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
840 archive_entry_free(entry_main
);
846 * We need a valid name for the regular 'ustar' entry. This routine
847 * tries to hack something more-or-less reasonable.
849 * The approach here tries to preserve leading dir names. We do so by
850 * working with four sections:
851 * 1) "prefix" directory names,
852 * 2) "suffix" directory names,
853 * 3) inserted dir name (optional),
856 * These sections must satisfy the following requirements:
857 * * Parts 1 & 2 together form an initial portion of the dir name.
858 * * Part 3 is specified by the caller. (It should not contain a leading
860 * * Part 4 forms an initial portion of the base filename.
861 * * The filename must be <= 99 chars to fit the ustar 'name' field.
862 * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
863 * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
864 * * If the original name ends in a '/', the new name must also end in a '/'
865 * * Trailing '/.' sequences may be stripped.
867 * Note: Recall that the ustar format does not store the '/' separating
868 * parts 1 & 2, but does store the '/' separating parts 2 & 3.
871 build_ustar_entry_name(char *dest
, const char *src
, size_t src_length
,
874 const char *prefix
, *prefix_end
;
875 const char *suffix
, *suffix_end
;
876 const char *filename
, *filename_end
;
878 int need_slash
= 0; /* Was there a trailing slash? */
879 size_t suffix_length
= 99;
882 /* Length of additional dir element to be added. */
886 /* +2 here allows for '/' before and after the insert. */
887 insert_length
= strlen(insert
) + 2;
889 /* Step 0: Quick bailout in a common case. */
890 if (src_length
< 100 && insert
== NULL
) {
891 strncpy(dest
, src
, src_length
);
892 dest
[src_length
] = '\0';
896 /* Step 1: Locate filename and enforce the length restriction. */
897 filename_end
= src
+ src_length
;
898 /* Remove trailing '/' chars and '/.' pairs. */
900 if (filename_end
> src
&& filename_end
[-1] == '/') {
902 need_slash
= 1; /* Remember to restore trailing '/'. */
905 if (filename_end
> src
+ 1 && filename_end
[-1] == '.'
906 && filename_end
[-2] == '/') {
908 need_slash
= 1; /* "foo/." will become "foo/" */
915 /* Find start of filename. */
916 filename
= filename_end
- 1;
917 while ((filename
> src
) && (*filename
!= '/'))
919 if ((*filename
== '/') && (filename
< filename_end
- 1))
921 /* Adjust filename_end so that filename + insert fits in 99 chars. */
922 suffix_length
-= insert_length
;
923 if (filename_end
> filename
+ suffix_length
)
924 filename_end
= filename
+ suffix_length
;
925 /* Calculate max size for "suffix" section (#3 above). */
926 suffix_length
-= filename_end
- filename
;
928 /* Step 2: Locate the "prefix" section of the dirname, including
931 prefix_end
= prefix
+ 155;
932 if (prefix_end
> filename
)
933 prefix_end
= filename
;
934 while (prefix_end
> prefix
&& *prefix_end
!= '/')
936 if ((prefix_end
< filename
) && (*prefix_end
== '/'))
939 /* Step 3: Locate the "suffix" section of the dirname,
940 * including trailing '/'. */
942 suffix_end
= suffix
+ suffix_length
; /* Enforce limit. */
943 if (suffix_end
> filename
)
944 suffix_end
= filename
;
945 if (suffix_end
< suffix
)
947 while (suffix_end
> suffix
&& *suffix_end
!= '/')
949 if ((suffix_end
< filename
) && (*suffix_end
== '/'))
952 /* Step 4: Build the new name. */
953 /* The OpenBSD strlcpy function is safer, but less portable. */
954 /* Rather than maintain two versions, just use the strncpy version. */
956 if (prefix_end
> prefix
) {
957 strncpy(p
, prefix
, prefix_end
- prefix
);
958 p
+= prefix_end
- prefix
;
960 if (suffix_end
> suffix
) {
961 strncpy(p
, suffix
, suffix_end
- suffix
);
962 p
+= suffix_end
- suffix
;
964 if (insert
!= NULL
) {
965 /* Note: assume insert does not have leading or trailing '/' */
970 strncpy(p
, filename
, filename_end
- filename
);
971 p
+= filename_end
- filename
;
980 * The ustar header for the pax extended attributes must have a
981 * reasonable name: SUSv3 suggests 'dirname'/PaxHeader/'filename'
983 * Joerg Schiling has argued that this is unnecessary because, in practice,
984 * if the pax extended attributes get extracted as regular files, noone is
985 * going to bother reading those attributes to manually restore them.
986 * Based on this, 'star' uses /tmp/PaxHeader/'basename' as the ustar header
987 * name. This is a tempting argument, but I'm not entirely convinced.
988 * I'm also uncomfortable with the fact that "/tmp" is a Unix-ism.
990 * The following routine implements the SUSv3 recommendation, and is
991 * much simpler because build_ustar_entry_name() above already does
992 * most of the work (we just need to give it an extra path element to
993 * insert and handle a few pathological cases).
996 build_pax_attribute_name(char *dest
, const char *src
)
1000 /* Handle the null filename case. */
1001 if (src
== NULL
|| *src
== '\0') {
1002 strcpy(dest
, "PaxHeader/blank");
1006 /* Prune final '/' and other unwanted final elements. */
1007 p
= src
+ strlen(src
);
1009 /* Ends in "/", remove the '/' */
1010 if (p
> src
&& p
[-1] == '/') {
1014 /* Ends in "/.", remove the '.' */
1015 if (p
> src
+ 1 && p
[-1] == '.'
1023 /* Pathological case: After above, there was nothing left.
1024 * This includes "/." "/./." "/.//./." etc. */
1026 strcpy(dest
, "/PaxHeader/rootdir");
1030 /* Convert unadorned "." into a suitable filename. */
1031 if (*src
== '.' && p
== src
+ 1) {
1032 strcpy(dest
, "PaxHeader/currentdir");
1036 /* General case: build a ustar-compatible name adding "/PaxHeader/". */
1037 build_ustar_entry_name(dest
, src
, p
- src
, "PaxHeader");
1042 /* Write two null blocks for the end of archive */
1044 archive_write_pax_finish(struct archive_write
*a
)
1049 if (a
->compression_write
== NULL
)
1050 return (ARCHIVE_OK
);
1052 pax
= (struct pax
*)a
->format_data
;
1053 r
= write_nulls(a
, 512 * 2);
1058 archive_write_pax_destroy(struct archive_write
*a
)
1062 pax
= (struct pax
*)a
->format_data
;
1063 archive_string_free(&pax
->pax_header
);
1065 a
->format_data
= NULL
;
1066 return (ARCHIVE_OK
);
1070 archive_write_pax_finish_entry(struct archive_write
*a
)
1075 pax
= (struct pax
*)a
->format_data
;
1076 ret
= write_nulls(a
, pax
->entry_bytes_remaining
+ pax
->entry_padding
);
1077 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
1082 write_nulls(struct archive_write
*a
, size_t padding
)
1086 while (padding
> 0) {
1087 to_write
= padding
< a
->null_length
? padding
: a
->null_length
;
1088 ret
= (a
->compression_write
)(a
, a
->nulls
, to_write
);
1089 if (ret
!= ARCHIVE_OK
)
1091 padding
-= to_write
;
1093 return (ARCHIVE_OK
);
1097 archive_write_pax_data(struct archive_write
*a
, const void *buff
, size_t s
)
1102 pax
= (struct pax
*)a
->format_data
;
1103 if (s
> pax
->entry_bytes_remaining
)
1104 s
= pax
->entry_bytes_remaining
;
1106 ret
= (a
->compression_write
)(a
, buff
, s
);
1107 pax
->entry_bytes_remaining
-= s
;
1108 if (ret
== ARCHIVE_OK
)
1115 has_non_ASCII(const wchar_t *wp
)
1117 while (*wp
!= L
'\0' && *wp
< 128)
1119 return (*wp
!= L
'\0');
1123 * Used by extended attribute support; encodes the name
1124 * so that there will be no '=' characters in the result.
1127 url_encode(const char *in
)
1134 for (s
= in
; *s
!= '\0'; s
++) {
1135 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=')
1141 out
= (char *)malloc(out_len
+ 1);
1145 for (s
= in
, d
= out
; *s
!= '\0'; s
++) {
1146 /* encode any non-printable ASCII character or '%' or '=' */
1147 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=') {
1148 /* URL encoding is '%' followed by two hex digits */
1150 *d
++ = "0123456789ABCDEF"[0x0f & (*s
>> 4)];
1151 *d
++ = "0123456789ABCDEF"[0x0f & *s
];
1161 * Encode a sequence of bytes into a C string using base-64 encoding.
1163 * Returns a null-terminated C string allocated with malloc(); caller
1164 * is responsible for freeing the result.
1167 base64_encode(const char *s
, size_t len
)
1169 static const char digits
[64] =
1170 { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
1171 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
1172 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
1173 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
1178 /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
1179 out
= (char *)malloc((len
* 4 + 2) / 3 + 1);
1184 /* Convert each group of 3 bytes into 4 characters. */
1186 v
= (((int)s
[0] << 16) & 0xff0000)
1187 | (((int)s
[1] << 8) & 0xff00)
1188 | (((int)s
[2]) & 0x00ff);
1191 *d
++ = digits
[(v
>> 18) & 0x3f];
1192 *d
++ = digits
[(v
>> 12) & 0x3f];
1193 *d
++ = digits
[(v
>> 6) & 0x3f];
1194 *d
++ = digits
[(v
) & 0x3f];
1196 /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
1200 v
= (((int)s
[0] << 16) & 0xff0000);
1201 *d
++ = digits
[(v
>> 18) & 0x3f];
1202 *d
++ = digits
[(v
>> 12) & 0x3f];
1205 v
= (((int)s
[0] << 16) & 0xff0000)
1206 | (((int)s
[1] << 8) & 0xff00);
1207 *d
++ = digits
[(v
>> 18) & 0x3f];
1208 *d
++ = digits
[(v
>> 12) & 0x3f];
1209 *d
++ = digits
[(v
>> 6) & 0x3f];
1212 /* Add trailing NUL character so output is a valid C string. */