2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_format_pax.c,v 1.47 2008/05/26 17:00:23 kientzle Exp $");
40 #include "archive_entry.h"
41 #include "archive_private.h"
42 #include "archive_write_private.h"
45 uint64_t entry_bytes_remaining
;
46 uint64_t entry_padding
;
47 struct archive_string pax_header
;
50 static void add_pax_attr(struct archive_string
*, const char *key
,
52 static void add_pax_attr_int(struct archive_string
*,
53 const char *key
, int64_t value
);
54 static void add_pax_attr_time(struct archive_string
*,
55 const char *key
, int64_t sec
,
57 static void add_pax_attr_w(struct archive_string
*,
58 const char *key
, const wchar_t *wvalue
);
59 static ssize_t
archive_write_pax_data(struct archive_write
*,
60 const void *, size_t);
61 static int archive_write_pax_finish(struct archive_write
*);
62 static int archive_write_pax_destroy(struct archive_write
*);
63 static int archive_write_pax_finish_entry(struct archive_write
*);
64 static int archive_write_pax_header(struct archive_write
*,
65 struct archive_entry
*);
66 static char *base64_encode(const char *src
, size_t len
);
67 static char *build_pax_attribute_name(char *dest
, const char *src
);
68 static char *build_ustar_entry_name(char *dest
, const char *src
,
69 size_t src_length
, const char *insert
);
70 static char *format_int(char *dest
, int64_t);
71 static int has_non_ASCII(const wchar_t *);
72 static char *url_encode(const char *in
);
73 static int write_nulls(struct archive_write
*, size_t);
76 * Set output format to 'restricted pax' format.
78 * This is the same as normal 'pax', but tries to suppress
79 * the pax header whenever possible. This is the default for
80 * bsdtar, for instance.
83 archive_write_set_format_pax_restricted(struct archive
*_a
)
85 struct archive_write
*a
= (struct archive_write
*)_a
;
87 r
= archive_write_set_format_pax(&a
->archive
);
88 a
->archive
.archive_format
= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
;
89 a
->archive
.archive_format_name
= "restricted POSIX pax interchange";
94 * Set output format to 'pax' format.
97 archive_write_set_format_pax(struct archive
*_a
)
99 struct archive_write
*a
= (struct archive_write
*)_a
;
102 if (a
->format_destroy
!= NULL
)
103 (a
->format_destroy
)(a
);
105 pax
= (struct pax
*)malloc(sizeof(*pax
));
107 archive_set_error(&a
->archive
, ENOMEM
, "Can't allocate pax data");
108 return (ARCHIVE_FATAL
);
110 memset(pax
, 0, sizeof(*pax
));
111 a
->format_data
= pax
;
113 a
->pad_uncompressed
= 1;
114 a
->format_write_header
= archive_write_pax_header
;
115 a
->format_write_data
= archive_write_pax_data
;
116 a
->format_finish
= archive_write_pax_finish
;
117 a
->format_destroy
= archive_write_pax_destroy
;
118 a
->format_finish_entry
= archive_write_pax_finish_entry
;
119 a
->archive
.archive_format
= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
;
120 a
->archive
.archive_format_name
= "POSIX pax interchange";
125 * Note: This code assumes that 'nanos' has the same sign as 'sec',
126 * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
127 * and not -0.8 seconds. This is a pretty pedantic point, as we're
128 * unlikely to encounter many real files created before Jan 1, 1970,
129 * much less ones with timestamps recorded to sub-second resolution.
132 add_pax_attr_time(struct archive_string
*as
, const char *key
,
133 int64_t sec
, unsigned long nanos
)
138 * Note that each byte contributes fewer than 3 base-10
139 * digits, so this will always be big enough.
141 char tmp
[1 + 3*sizeof(sec
) + 1 + 3*sizeof(nanos
)];
143 tmp
[sizeof(tmp
) - 1] = 0;
144 t
= tmp
+ sizeof(tmp
) - 1;
146 /* Skip trailing zeros in the fractional part. */
147 for (digit
= 0, i
= 10; i
> 0 && digit
== 0; i
--) {
152 /* Only format the fraction if it's non-zero. */
155 *--t
= "0123456789"[digit
];
162 t
= format_int(t
, sec
);
164 add_pax_attr(as
, key
, t
);
168 format_int(char *t
, int64_t i
)
179 *--t
= "0123456789"[i
% 10];
187 add_pax_attr_int(struct archive_string
*as
, const char *key
, int64_t value
)
189 char tmp
[1 + 3 * sizeof(value
)];
191 tmp
[sizeof(tmp
) - 1] = 0;
192 add_pax_attr(as
, key
, format_int(tmp
+ sizeof(tmp
) - 1, value
));
196 utf8_encode(const wchar_t *wval
)
201 char *utf8_value
, *p
;
204 for (wp
= wval
; *wp
!= L
'\0'; ) {
208 else if (wc
<= 0x7ff)
210 else if (wc
<= 0xffff)
212 else if (wc
<= 0x1fffff)
214 else if (wc
<= 0x3ffffff)
216 else if (wc
<= 0x7fffffff)
218 /* Ignore larger values; UTF-8 can't encode them. */
221 utf8_value
= (char *)malloc(utf8len
+ 1);
222 if (utf8_value
== NULL
) {
223 __archive_errx(1, "Not enough memory for attributes");
227 for (wp
= wval
, p
= utf8_value
; *wp
!= L
'\0'; ) {
231 } else if (wc
<= 0x7ff) {
232 p
[0] = 0xc0 | ((wc
>> 6) & 0x1f);
233 p
[1] = 0x80 | (wc
& 0x3f);
235 } else if (wc
<= 0xffff) {
236 p
[0] = 0xe0 | ((wc
>> 12) & 0x0f);
237 p
[1] = 0x80 | ((wc
>> 6) & 0x3f);
238 p
[2] = 0x80 | (wc
& 0x3f);
240 } else if (wc
<= 0x1fffff) {
241 p
[0] = 0xf0 | ((wc
>> 18) & 0x07);
242 p
[1] = 0x80 | ((wc
>> 12) & 0x3f);
243 p
[2] = 0x80 | ((wc
>> 6) & 0x3f);
244 p
[3] = 0x80 | (wc
& 0x3f);
246 } else if (wc
<= 0x3ffffff) {
247 p
[0] = 0xf8 | ((wc
>> 24) & 0x03);
248 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
249 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
250 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
251 p
[4] = 0x80 | (wc
& 0x3f);
253 } else if (wc
<= 0x7fffffff) {
254 p
[0] = 0xfc | ((wc
>> 30) & 0x01);
255 p
[1] = 0x80 | ((wc
>> 24) & 0x3f);
256 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
257 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
258 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
259 p
[4] = 0x80 | (wc
& 0x3f);
262 /* Ignore larger values; UTF-8 can't encode them. */
270 add_pax_attr_w(struct archive_string
*as
, const char *key
, const wchar_t *wval
)
272 char *utf8_value
= utf8_encode(wval
);
273 if (utf8_value
== NULL
)
275 add_pax_attr(as
, key
, utf8_value
);
280 * Add a key/value attribute to the pax header. This function handles
281 * the length field and various other syntactic requirements.
284 add_pax_attr(struct archive_string
*as
, const char *key
, const char *value
)
286 int digits
, i
, len
, next_ten
;
287 char tmp
[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */
290 * PAX attributes have the following layout:
291 * <len> <space> <key> <=> <value> <nl>
293 len
= 1 + strlen(key
) + 1 + strlen(value
) + 1;
296 * The <len> field includes the length of the <len> field, so
297 * computing the correct length is tricky. I start by
298 * counting the number of base-10 digits in 'len' and
299 * computing the next higher power of 10.
307 next_ten
= next_ten
* 10;
310 * For example, if string without the length field is 99
311 * chars, then adding the 2 digit length "99" will force the
312 * total length past 100, requiring an extra digit. The next
313 * statement adjusts for this effect.
315 if (len
+ digits
>= next_ten
)
318 /* Now, we have the right length so we can build the line. */
319 tmp
[sizeof(tmp
) - 1] = 0; /* Null-terminate the work area. */
320 archive_strcat(as
, format_int(tmp
+ sizeof(tmp
) - 1, len
+ digits
));
321 archive_strappend_char(as
, ' ');
322 archive_strcat(as
, key
);
323 archive_strappend_char(as
, '=');
324 archive_strcat(as
, value
);
325 archive_strappend_char(as
, '\n');
329 archive_write_pax_header_xattrs(struct pax
*pax
, struct archive_entry
*entry
)
331 struct archive_string s
;
332 int i
= archive_entry_xattr_reset(entry
);
338 char *url_encoded_name
= NULL
, *encoded_name
= NULL
;
339 wchar_t *wcs_name
= NULL
;
342 archive_entry_xattr_next(entry
, &name
, &value
, &size
);
343 /* Name is URL-encoded, then converted to wchar_t,
344 * then UTF-8 encoded. */
345 url_encoded_name
= url_encode(name
);
346 if (url_encoded_name
!= NULL
) {
347 /* Convert narrow-character to wide-character. */
348 int wcs_length
= strlen(url_encoded_name
);
349 wcs_name
= (wchar_t *)malloc((wcs_length
+ 1) * sizeof(wchar_t));
350 if (wcs_name
== NULL
)
351 __archive_errx(1, "No memory for xattr conversion");
352 mbstowcs(wcs_name
, url_encoded_name
, wcs_length
);
353 wcs_name
[wcs_length
] = 0;
354 free(url_encoded_name
); /* Done with this. */
356 if (wcs_name
!= NULL
) {
357 encoded_name
= utf8_encode(wcs_name
);
358 free(wcs_name
); /* Done with wchar_t name. */
361 encoded_value
= base64_encode((const char *)value
, size
);
363 if (encoded_name
!= NULL
&& encoded_value
!= NULL
) {
364 archive_string_init(&s
);
365 archive_strcpy(&s
, "LIBARCHIVE.xattr.");
366 archive_strcat(&s
, encoded_name
);
367 add_pax_attr(&(pax
->pax_header
), s
.s
, encoded_value
);
368 archive_string_free(&s
);
376 * TODO: Consider adding 'comment' and 'charset' fields to
377 * archive_entry so that clients can specify them. Also, consider
378 * adding generic key/value tags so clients can add arbitrary
382 archive_write_pax_header(struct archive_write
*a
,
383 struct archive_entry
*entry_original
)
385 struct archive_entry
*entry_main
;
390 int need_extension
, r
, ret
;
392 const char *hdrcharset
= NULL
;
393 const char *hardlink
;
394 const char *path
= NULL
, *linkpath
= NULL
;
395 const char *uname
= NULL
, *gname
= NULL
;
396 const wchar_t *path_w
= NULL
, *linkpath_w
= NULL
;
397 const wchar_t *uname_w
= NULL
, *gname_w
= NULL
;
401 char ustar_entry_name
[256];
402 char pax_entry_name
[256];
406 pax
= (struct pax
*)a
->format_data
;
408 hardlink
= archive_entry_hardlink(entry_original
);
410 /* Make sure this is a type of entry that we can handle here */
411 if (hardlink
== NULL
) {
412 switch (archive_entry_filetype(entry_original
)) {
421 * Ensure a trailing '/'. Modify the original
422 * entry so the client sees the change.
424 p
= archive_entry_pathname(entry_original
);
425 if (p
[strlen(p
) - 1] != '/') {
426 t
= (char *)malloc(strlen(p
) + 2);
428 archive_set_error(&a
->archive
, ENOMEM
,
429 "Can't allocate pax data");
430 return(ARCHIVE_FATAL
);
434 archive_entry_copy_pathname(entry_original
, t
);
439 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
440 "tar format cannot archive this (type=0%lo)",
441 (unsigned long)archive_entry_filetype(entry_original
));
442 return (ARCHIVE_WARN
);
446 /* Copy entry so we can modify it as needed. */
447 entry_main
= archive_entry_clone(entry_original
);
448 archive_string_empty(&(pax
->pax_header
)); /* Blank our work area. */
451 * First, check the name fields and see if any of them
452 * require binary coding. If any of them does, then all of
456 path
= archive_entry_pathname(entry_main
);
457 path_w
= archive_entry_pathname_w(entry_main
);
458 if (path
!= NULL
&& path_w
== NULL
) {
459 archive_set_error(&a
->archive
, EILSEQ
,
460 "Can't translate pathname '%s' to UTF-8", path
);
462 hdrcharset
= "BINARY";
464 uname
= archive_entry_uname(entry_main
);
465 uname_w
= archive_entry_uname_w(entry_main
);
466 if (uname
!= NULL
&& uname_w
== NULL
) {
467 archive_set_error(&a
->archive
, EILSEQ
,
468 "Can't translate uname '%s' to UTF-8", uname
);
470 hdrcharset
= "BINARY";
472 gname
= archive_entry_gname(entry_main
);
473 gname_w
= archive_entry_gname_w(entry_main
);
474 if (gname
!= NULL
&& gname_w
== NULL
) {
475 archive_set_error(&a
->archive
, EILSEQ
,
476 "Can't translate gname '%s' to UTF-8", gname
);
478 hdrcharset
= "BINARY";
481 if (linkpath
!= NULL
) {
482 linkpath_w
= archive_entry_hardlink_w(entry_main
);
484 linkpath
= archive_entry_symlink(entry_main
);
485 if (linkpath
!= NULL
)
486 linkpath_w
= archive_entry_symlink_w(entry_main
);
488 if (linkpath
!= NULL
&& linkpath_w
== NULL
) {
489 archive_set_error(&a
->archive
, EILSEQ
,
490 "Can't translate linkpath '%s' to UTF-8", linkpath
);
492 hdrcharset
= "BINARY";
495 /* Store the header encoding first, to be nice to readers. */
496 if (hdrcharset
!= NULL
)
497 add_pax_attr(&(pax
->pax_header
), "hdrcharset", hdrcharset
);
501 * If name is too long, or has non-ASCII characters, add
502 * 'path' to pax extended attrs. (Note that an unconvertible
503 * name must have non-ASCII characters.)
506 /* We don't have a narrow version, so we have to store
507 * the wide version. */
508 add_pax_attr_w(&(pax
->pax_header
), "path", path_w
);
509 archive_entry_set_pathname(entry_main
, "@WidePath");
511 } else if (has_non_ASCII(path_w
)) {
512 /* We have non-ASCII characters. */
513 if (path_w
== NULL
|| hdrcharset
!= NULL
) {
514 /* Can't do UTF-8, so store it raw. */
515 add_pax_attr(&(pax
->pax_header
), "path", path
);
518 add_pax_attr_w(&(pax
->pax_header
),
521 archive_entry_set_pathname(entry_main
,
522 build_ustar_entry_name(ustar_entry_name
,
523 path
, strlen(path
), NULL
));
526 /* We have an all-ASCII path; we'd like to just store
527 * it in the ustar header if it will fit. Yes, this
528 * duplicates some of the logic in
529 * write_set_format_ustar.c
531 if (strlen(path
) <= 100) {
532 /* Fits in the old 100-char tar name field. */
534 /* Find largest suffix that will fit. */
535 /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
536 suffix
= strchr(path
+ strlen(path
) - 100 - 1, '/');
537 /* Don't attempt an empty prefix. */
539 suffix
= strchr(suffix
+ 1, '/');
540 /* We can put it in the ustar header if it's
541 * all ASCII and it's either <= 100 characters
542 * or can be split at a '/' into a prefix <=
543 * 155 chars and a suffix <= 100 chars. (Note
544 * the strchr() above will return NULL exactly
545 * when the path can't be split.)
547 if (suffix
== NULL
/* Suffix > 100 chars. */
548 || suffix
[1] == '\0' /* empty suffix */
549 || suffix
- path
> 155) /* Prefix > 155 chars */
551 if (path_w
== NULL
|| hdrcharset
!= NULL
) {
552 /* Can't do UTF-8, so store it raw. */
553 add_pax_attr(&(pax
->pax_header
),
557 add_pax_attr_w(&(pax
->pax_header
),
560 archive_entry_set_pathname(entry_main
,
561 build_ustar_entry_name(ustar_entry_name
,
562 path
, strlen(path
), NULL
));
568 if (linkpath
!= NULL
) {
569 /* If link name is too long or has non-ASCII characters, add
570 * 'linkpath' to pax extended attrs. */
571 if (strlen(linkpath
) > 100 || linkpath_w
== NULL
572 || linkpath_w
== NULL
|| has_non_ASCII(linkpath_w
)) {
573 if (linkpath_w
== NULL
|| hdrcharset
!= NULL
)
574 /* If the linkpath is not convertible
575 * to wide, or we're encoding in
576 * binary anyway, store it raw. */
577 add_pax_attr(&(pax
->pax_header
),
578 "linkpath", linkpath
);
580 /* If the link is long or has a
581 * non-ASCII character, store it as a
582 * pax extended attribute. */
583 add_pax_attr_w(&(pax
->pax_header
),
584 "linkpath", linkpath_w
);
585 if (strlen(linkpath
) > 100) {
586 if (hardlink
!= NULL
)
587 archive_entry_set_hardlink(entry_main
,
588 "././@LongHardLink");
590 archive_entry_set_symlink(entry_main
,
597 /* If file size is too large, add 'size' to pax extended attrs. */
598 if (archive_entry_size(entry_main
) >= (((int64_t)1) << 33)) {
599 add_pax_attr_int(&(pax
->pax_header
), "size",
600 archive_entry_size(entry_main
));
604 /* If numeric GID is too large, add 'gid' to pax extended attrs. */
605 if (archive_entry_gid(entry_main
) >= (1 << 18)) {
606 add_pax_attr_int(&(pax
->pax_header
), "gid",
607 archive_entry_gid(entry_main
));
611 /* If group name is too large or has non-ASCII characters, add
612 * 'gname' to pax extended attrs. */
614 if (strlen(gname
) > 31
616 || has_non_ASCII(gname_w
))
618 if (gname_w
== NULL
|| hdrcharset
!= NULL
) {
619 add_pax_attr(&(pax
->pax_header
),
622 add_pax_attr_w(&(pax
->pax_header
),
629 /* If numeric UID is too large, add 'uid' to pax extended attrs. */
630 if (archive_entry_uid(entry_main
) >= (1 << 18)) {
631 add_pax_attr_int(&(pax
->pax_header
), "uid",
632 archive_entry_uid(entry_main
));
636 /* Add 'uname' to pax extended attrs if necessary. */
638 if (strlen(uname
) > 31
640 || has_non_ASCII(uname_w
))
642 if (uname_w
== NULL
|| hdrcharset
!= NULL
) {
643 add_pax_attr(&(pax
->pax_header
),
646 add_pax_attr_w(&(pax
->pax_header
),
654 * POSIX/SUSv3 doesn't provide a standard key for large device
655 * numbers. I use the same keys here that Joerg Schilling
656 * used for 'star.' (Which, somewhat confusingly, are called
657 * "devXXX" even though they code "rdev" values.) No doubt,
658 * other implementations use other keys. Note that there's no
659 * reason we can't write the same information into a number of
662 * Of course, this is only needed for block or char device entries.
664 if (archive_entry_filetype(entry_main
) == AE_IFBLK
665 || archive_entry_filetype(entry_main
) == AE_IFCHR
) {
667 * If rdevmajor is too large, add 'SCHILY.devmajor' to
668 * extended attributes.
670 dev_t rdevmajor
, rdevminor
;
671 rdevmajor
= archive_entry_rdevmajor(entry_main
);
672 rdevminor
= archive_entry_rdevminor(entry_main
);
673 if (rdevmajor
>= (1 << 18)) {
674 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devmajor",
677 * Non-strict formatting below means we don't
678 * have to truncate here. Not truncating improves
679 * the chance that some more modern tar archivers
680 * (such as GNU tar 1.13) can restore the full
681 * value even if they don't understand the pax
682 * extended attributes. See my rant below about
683 * file size fields for additional details.
685 /* archive_entry_set_rdevmajor(entry_main,
686 rdevmajor & ((1 << 18) - 1)); */
691 * If devminor is too large, add 'SCHILY.devminor' to
692 * extended attributes.
694 if (rdevminor
>= (1 << 18)) {
695 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devminor",
697 /* Truncation is not necessary here, either. */
698 /* archive_entry_set_rdevminor(entry_main,
699 rdevminor & ((1 << 18) - 1)); */
705 * Technically, the mtime field in the ustar header can
706 * support 33 bits, but many platforms use signed 32-bit time
707 * values. The cutoff of 0x7fffffff here is a compromise.
708 * Yes, this check is duplicated just below; this helps to
709 * avoid writing an mtime attribute just to handle a
710 * high-resolution timestamp in "restricted pax" mode.
712 if (!need_extension
&&
713 ((archive_entry_mtime(entry_main
) < 0)
714 || (archive_entry_mtime(entry_main
) >= 0x7fffffff)))
717 /* I use a star-compatible file flag attribute. */
718 p
= archive_entry_fflags_text(entry_main
);
719 if (!need_extension
&& p
!= NULL
&& *p
!= '\0')
722 /* If there are non-trivial ACL entries, we need an extension. */
723 if (!need_extension
&& archive_entry_acl_count(entry_original
,
724 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
) > 0)
727 /* If there are non-trivial ACL entries, we need an extension. */
728 if (!need_extension
&& archive_entry_acl_count(entry_original
,
729 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
) > 0)
732 /* If there are extended attributes, we need an extension */
733 if (!need_extension
&& archive_entry_xattr_count(entry_original
) > 0)
737 * The following items are handled differently in "pax
738 * restricted" format. In particular, in "pax restricted"
739 * format they won't be added unless need_extension is
740 * already set (we're already generating an extended header, so
741 * may as well include these).
743 if (a
->archive
.archive_format
!= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
||
746 if (archive_entry_mtime(entry_main
) < 0 ||
747 archive_entry_mtime(entry_main
) >= 0x7fffffff ||
748 archive_entry_mtime_nsec(entry_main
) != 0)
749 add_pax_attr_time(&(pax
->pax_header
), "mtime",
750 archive_entry_mtime(entry_main
),
751 archive_entry_mtime_nsec(entry_main
));
753 if (archive_entry_ctime(entry_main
) != 0 ||
754 archive_entry_ctime_nsec(entry_main
) != 0)
755 add_pax_attr_time(&(pax
->pax_header
), "ctime",
756 archive_entry_ctime(entry_main
),
757 archive_entry_ctime_nsec(entry_main
));
759 if (archive_entry_atime(entry_main
) != 0 ||
760 archive_entry_atime_nsec(entry_main
) != 0)
761 add_pax_attr_time(&(pax
->pax_header
), "atime",
762 archive_entry_atime(entry_main
),
763 archive_entry_atime_nsec(entry_main
));
765 /* I use a star-compatible file flag attribute. */
766 p
= archive_entry_fflags_text(entry_main
);
767 if (p
!= NULL
&& *p
!= '\0')
768 add_pax_attr(&(pax
->pax_header
), "SCHILY.fflags", p
);
770 /* I use star-compatible ACL attributes. */
771 wp
= archive_entry_acl_text_w(entry_original
,
772 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
|
773 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
774 if (wp
!= NULL
&& *wp
!= L
'\0')
775 add_pax_attr_w(&(pax
->pax_header
),
776 "SCHILY.acl.access", wp
);
777 wp
= archive_entry_acl_text_w(entry_original
,
778 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
|
779 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
780 if (wp
!= NULL
&& *wp
!= L
'\0')
781 add_pax_attr_w(&(pax
->pax_header
),
782 "SCHILY.acl.default", wp
);
784 /* Include star-compatible metadata info. */
785 /* Note: "SCHILY.dev{major,minor}" are NOT the
786 * major/minor portions of "SCHILY.dev". */
787 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.dev",
788 archive_entry_dev(entry_main
));
789 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.ino",
790 archive_entry_ino(entry_main
));
791 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.nlink",
792 archive_entry_nlink(entry_main
));
794 /* Store extended attributes */
795 archive_write_pax_header_xattrs(pax
, entry_original
);
798 /* Only regular files have data. */
799 if (archive_entry_filetype(entry_main
) != AE_IFREG
)
800 archive_entry_set_size(entry_main
, 0);
803 * Pax-restricted does not store data for hardlinks, in order
804 * to improve compatibility with ustar.
806 if (a
->archive
.archive_format
!= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
&&
808 archive_entry_set_size(entry_main
, 0);
811 * XXX Full pax interchange format does permit a hardlink
812 * entry to have data associated with it. I'm not supporting
813 * that here because the client expects me to tell them whether
814 * or not this format expects data for hardlinks. If I
815 * don't check here, then every pax archive will end up with
816 * duplicated data for hardlinks. Someday, there may be
817 * need to select this behavior, in which case the following
818 * will need to be revisited. XXX
820 if (hardlink
!= NULL
)
821 archive_entry_set_size(entry_main
, 0);
823 /* Format 'ustar' header for main entry.
825 * The trouble with file size: If the reader can't understand
826 * the file size, they may not be able to locate the next
827 * entry and the rest of the archive is toast. Pax-compliant
828 * readers are supposed to ignore the file size in the main
829 * header, so the question becomes how to maximize portability
830 * for readers that don't support pax attribute extensions.
831 * For maximum compatibility, I permit numeric extensions in
832 * the main header so that the file size stored will always be
833 * correct, even if it's in a format that only some
834 * implementations understand. The technique used here is:
836 * a) If possible, follow the standard exactly. This handles
837 * files up to 8 gigabytes minus 1.
839 * b) If that fails, try octal but omit the field terminator.
840 * That handles files up to 64 gigabytes minus 1.
842 * c) Otherwise, use base-256 extensions. That handles files
843 * up to 2^63 in this implementation, with the potential to
844 * go up to 2^94. That should hold us for a while. ;-)
846 * The non-strict formatter uses similar logic for other
847 * numeric fields, though they're less critical.
849 __archive_write_format_header_ustar(a
, ustarbuff
, entry_main
, -1, 0);
851 /* If we built any extended attributes, write that entry first. */
852 if (archive_strlen(&(pax
->pax_header
)) > 0) {
853 struct archive_entry
*pax_attr_entry
;
860 pax_attr_entry
= archive_entry_new();
861 p
= archive_entry_pathname(entry_main
);
862 archive_entry_set_pathname(pax_attr_entry
,
863 build_pax_attribute_name(pax_entry_name
, p
));
864 archive_entry_set_size(pax_attr_entry
,
865 archive_strlen(&(pax
->pax_header
)));
866 /* Copy uid/gid (but clip to ustar limits). */
867 uid
= archive_entry_uid(entry_main
);
870 archive_entry_set_uid(pax_attr_entry
, uid
);
871 gid
= archive_entry_gid(entry_main
);
874 archive_entry_set_gid(pax_attr_entry
, gid
);
875 /* Copy mode over (but not setuid/setgid bits) */
876 mode
= archive_entry_mode(entry_main
);
886 archive_entry_set_mode(pax_attr_entry
, mode
);
888 /* Copy uname/gname. */
889 archive_entry_set_uname(pax_attr_entry
,
890 archive_entry_uname(entry_main
));
891 archive_entry_set_gname(pax_attr_entry
,
892 archive_entry_gname(entry_main
));
894 /* Copy mtime, but clip to ustar limits. */
895 s
= archive_entry_mtime(entry_main
);
896 ns
= archive_entry_mtime_nsec(entry_main
);
897 if (s
< 0) { s
= 0; ns
= 0; }
898 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
899 archive_entry_set_mtime(pax_attr_entry
, s
, ns
);
901 /* Ditto for atime. */
902 s
= archive_entry_atime(entry_main
);
903 ns
= archive_entry_atime_nsec(entry_main
);
904 if (s
< 0) { s
= 0; ns
= 0; }
905 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
906 archive_entry_set_atime(pax_attr_entry
, s
, ns
);
908 /* Standard ustar doesn't support ctime. */
909 archive_entry_set_ctime(pax_attr_entry
, 0, 0);
911 r
= __archive_write_format_header_ustar(a
, paxbuff
,
912 pax_attr_entry
, 'x', 1);
914 archive_entry_free(pax_attr_entry
);
916 /* Note that the 'x' header shouldn't ever fail to format */
918 const char *msg
= "archive_write_pax_header: "
919 "'x' header failed?! This can't happen.\n";
920 write(2, msg
, strlen(msg
));
923 r
= (a
->compressor
.write
)(a
, paxbuff
, 512);
924 if (r
!= ARCHIVE_OK
) {
925 pax
->entry_bytes_remaining
= 0;
926 pax
->entry_padding
= 0;
927 return (ARCHIVE_FATAL
);
930 pax
->entry_bytes_remaining
= archive_strlen(&(pax
->pax_header
));
931 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
933 r
= (a
->compressor
.write
)(a
, pax
->pax_header
.s
,
934 archive_strlen(&(pax
->pax_header
)));
935 if (r
!= ARCHIVE_OK
) {
936 /* If a write fails, we're pretty much toast. */
937 return (ARCHIVE_FATAL
);
939 /* Pad out the end of the entry. */
940 r
= write_nulls(a
, pax
->entry_padding
);
941 if (r
!= ARCHIVE_OK
) {
942 /* If a write fails, we're pretty much toast. */
943 return (ARCHIVE_FATAL
);
945 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
948 /* Write the header for main entry. */
949 r
= (a
->compressor
.write
)(a
, ustarbuff
, 512);
954 * Inform the client of the on-disk size we're using, so
955 * they can avoid unnecessarily writing a body for something
956 * that we're just going to ignore.
958 archive_entry_set_size(entry_original
, archive_entry_size(entry_main
));
959 pax
->entry_bytes_remaining
= archive_entry_size(entry_main
);
960 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
961 archive_entry_free(entry_main
);
967 * We need a valid name for the regular 'ustar' entry. This routine
968 * tries to hack something more-or-less reasonable.
970 * The approach here tries to preserve leading dir names. We do so by
971 * working with four sections:
972 * 1) "prefix" directory names,
973 * 2) "suffix" directory names,
974 * 3) inserted dir name (optional),
977 * These sections must satisfy the following requirements:
978 * * Parts 1 & 2 together form an initial portion of the dir name.
979 * * Part 3 is specified by the caller. (It should not contain a leading
981 * * Part 4 forms an initial portion of the base filename.
982 * * The filename must be <= 99 chars to fit the ustar 'name' field.
983 * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
984 * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
985 * * If the original name ends in a '/', the new name must also end in a '/'
986 * * Trailing '/.' sequences may be stripped.
988 * Note: Recall that the ustar format does not store the '/' separating
989 * parts 1 & 2, but does store the '/' separating parts 2 & 3.
992 build_ustar_entry_name(char *dest
, const char *src
, size_t src_length
,
995 const char *prefix
, *prefix_end
;
996 const char *suffix
, *suffix_end
;
997 const char *filename
, *filename_end
;
999 int need_slash
= 0; /* Was there a trailing slash? */
1000 size_t suffix_length
= 99;
1003 /* Length of additional dir element to be added. */
1007 /* +2 here allows for '/' before and after the insert. */
1008 insert_length
= strlen(insert
) + 2;
1010 /* Step 0: Quick bailout in a common case. */
1011 if (src_length
< 100 && insert
== NULL
) {
1012 strncpy(dest
, src
, src_length
);
1013 dest
[src_length
] = '\0';
1017 /* Step 1: Locate filename and enforce the length restriction. */
1018 filename_end
= src
+ src_length
;
1019 /* Remove trailing '/' chars and '/.' pairs. */
1021 if (filename_end
> src
&& filename_end
[-1] == '/') {
1023 need_slash
= 1; /* Remember to restore trailing '/'. */
1026 if (filename_end
> src
+ 1 && filename_end
[-1] == '.'
1027 && filename_end
[-2] == '/') {
1029 need_slash
= 1; /* "foo/." will become "foo/" */
1036 /* Find start of filename. */
1037 filename
= filename_end
- 1;
1038 while ((filename
> src
) && (*filename
!= '/'))
1040 if ((*filename
== '/') && (filename
< filename_end
- 1))
1042 /* Adjust filename_end so that filename + insert fits in 99 chars. */
1043 suffix_length
-= insert_length
;
1044 if (filename_end
> filename
+ suffix_length
)
1045 filename_end
= filename
+ suffix_length
;
1046 /* Calculate max size for "suffix" section (#3 above). */
1047 suffix_length
-= filename_end
- filename
;
1049 /* Step 2: Locate the "prefix" section of the dirname, including
1052 prefix_end
= prefix
+ 155;
1053 if (prefix_end
> filename
)
1054 prefix_end
= filename
;
1055 while (prefix_end
> prefix
&& *prefix_end
!= '/')
1057 if ((prefix_end
< filename
) && (*prefix_end
== '/'))
1060 /* Step 3: Locate the "suffix" section of the dirname,
1061 * including trailing '/'. */
1062 suffix
= prefix_end
;
1063 suffix_end
= suffix
+ suffix_length
; /* Enforce limit. */
1064 if (suffix_end
> filename
)
1065 suffix_end
= filename
;
1066 if (suffix_end
< suffix
)
1067 suffix_end
= suffix
;
1068 while (suffix_end
> suffix
&& *suffix_end
!= '/')
1070 if ((suffix_end
< filename
) && (*suffix_end
== '/'))
1073 /* Step 4: Build the new name. */
1074 /* The OpenBSD strlcpy function is safer, but less portable. */
1075 /* Rather than maintain two versions, just use the strncpy version. */
1077 if (prefix_end
> prefix
) {
1078 strncpy(p
, prefix
, prefix_end
- prefix
);
1079 p
+= prefix_end
- prefix
;
1081 if (suffix_end
> suffix
) {
1082 strncpy(p
, suffix
, suffix_end
- suffix
);
1083 p
+= suffix_end
- suffix
;
1085 if (insert
!= NULL
) {
1086 /* Note: assume insert does not have leading or trailing '/' */
1088 p
+= strlen(insert
);
1091 strncpy(p
, filename
, filename_end
- filename
);
1092 p
+= filename_end
- filename
;
1101 * The ustar header for the pax extended attributes must have a
1102 * reasonable name: SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename'
1103 * where 'pid' is the PID of the archiving process. Unfortunately,
1104 * that makes testing a pain since the output varies for each run,
1105 * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename'
1106 * for now. (Someday, I'll make this settable. Then I can use the
1107 * SUS recommendation as default and test harnesses can override it
1108 * to get predictable results.)
1110 * Joerg Schilling has argued that this is unnecessary because, in
1111 * practice, if the pax extended attributes get extracted as regular
1112 * files, noone is going to bother reading those attributes to
1113 * manually restore them. Based on this, 'star' uses
1114 * /tmp/PaxHeader/'basename' as the ustar header name. This is a
1115 * tempting argument, in part because it's simpler than the SUSv3
1116 * recommendation, but I'm not entirely convinced. I'm also
1117 * uncomfortable with the fact that "/tmp" is a Unix-ism.
1119 * The following routine leverages build_ustar_entry_name() above and
1120 * so is simpler than you might think. It just needs to provide the
1121 * additional path element and handle a few pathological cases).
1124 build_pax_attribute_name(char *dest
, const char *src
)
1129 /* Handle the null filename case. */
1130 if (src
== NULL
|| *src
== '\0') {
1131 strcpy(dest
, "PaxHeader/blank");
1135 /* Prune final '/' and other unwanted final elements. */
1136 p
= src
+ strlen(src
);
1138 /* Ends in "/", remove the '/' */
1139 if (p
> src
&& p
[-1] == '/') {
1143 /* Ends in "/.", remove the '.' */
1144 if (p
> src
+ 1 && p
[-1] == '.'
1152 /* Pathological case: After above, there was nothing left.
1153 * This includes "/." "/./." "/.//./." etc. */
1155 strcpy(dest
, "/PaxHeader/rootdir");
1159 /* Convert unadorned "." into a suitable filename. */
1160 if (*src
== '.' && p
== src
+ 1) {
1161 strcpy(dest
, "PaxHeader/currentdir");
1166 * TODO: Push this string into the 'pax' structure to avoid
1167 * recomputing it every time. That will also open the door
1168 * to having clients override it.
1170 #if HAVE_GETPID && 0 /* Disable this for now; see above comment. */
1171 sprintf(buff
, "PaxHeader.%d", getpid());
1173 /* If the platform can't fetch the pid, don't include it. */
1174 strcpy(buff
, "PaxHeader");
1176 /* General case: build a ustar-compatible name adding "/PaxHeader/". */
1177 build_ustar_entry_name(dest
, src
, p
- src
, buff
);
1182 /* Write two null blocks for the end of archive */
1184 archive_write_pax_finish(struct archive_write
*a
)
1189 if (a
->compressor
.write
== NULL
)
1190 return (ARCHIVE_OK
);
1192 pax
= (struct pax
*)a
->format_data
;
1193 r
= write_nulls(a
, 512 * 2);
1198 archive_write_pax_destroy(struct archive_write
*a
)
1202 pax
= (struct pax
*)a
->format_data
;
1203 archive_string_free(&pax
->pax_header
);
1205 a
->format_data
= NULL
;
1206 return (ARCHIVE_OK
);
1210 archive_write_pax_finish_entry(struct archive_write
*a
)
1215 pax
= (struct pax
*)a
->format_data
;
1216 ret
= write_nulls(a
, pax
->entry_bytes_remaining
+ pax
->entry_padding
);
1217 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
1222 write_nulls(struct archive_write
*a
, size_t padding
)
1226 while (padding
> 0) {
1227 to_write
= padding
< a
->null_length
? padding
: a
->null_length
;
1228 ret
= (a
->compressor
.write
)(a
, a
->nulls
, to_write
);
1229 if (ret
!= ARCHIVE_OK
)
1231 padding
-= to_write
;
1233 return (ARCHIVE_OK
);
1237 archive_write_pax_data(struct archive_write
*a
, const void *buff
, size_t s
)
1242 pax
= (struct pax
*)a
->format_data
;
1243 if (s
> pax
->entry_bytes_remaining
)
1244 s
= pax
->entry_bytes_remaining
;
1246 ret
= (a
->compressor
.write
)(a
, buff
, s
);
1247 pax
->entry_bytes_remaining
-= s
;
1248 if (ret
== ARCHIVE_OK
)
1255 has_non_ASCII(const wchar_t *wp
)
1259 while (*wp
!= L
'\0' && *wp
< 128)
1261 return (*wp
!= L
'\0');
1265 * Used by extended attribute support; encodes the name
1266 * so that there will be no '=' characters in the result.
1269 url_encode(const char *in
)
1276 for (s
= in
; *s
!= '\0'; s
++) {
1277 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=')
1283 out
= (char *)malloc(out_len
+ 1);
1287 for (s
= in
, d
= out
; *s
!= '\0'; s
++) {
1288 /* encode any non-printable ASCII character or '%' or '=' */
1289 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=') {
1290 /* URL encoding is '%' followed by two hex digits */
1292 *d
++ = "0123456789ABCDEF"[0x0f & (*s
>> 4)];
1293 *d
++ = "0123456789ABCDEF"[0x0f & *s
];
1303 * Encode a sequence of bytes into a C string using base-64 encoding.
1305 * Returns a null-terminated C string allocated with malloc(); caller
1306 * is responsible for freeing the result.
1309 base64_encode(const char *s
, size_t len
)
1311 static const char digits
[64] =
1312 { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
1313 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
1314 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
1315 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
1320 /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
1321 out
= (char *)malloc((len
* 4 + 2) / 3 + 1);
1326 /* Convert each group of 3 bytes into 4 characters. */
1328 v
= (((int)s
[0] << 16) & 0xff0000)
1329 | (((int)s
[1] << 8) & 0xff00)
1330 | (((int)s
[2]) & 0x00ff);
1333 *d
++ = digits
[(v
>> 18) & 0x3f];
1334 *d
++ = digits
[(v
>> 12) & 0x3f];
1335 *d
++ = digits
[(v
>> 6) & 0x3f];
1336 *d
++ = digits
[(v
) & 0x3f];
1338 /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
1342 v
= (((int)s
[0] << 16) & 0xff0000);
1343 *d
++ = digits
[(v
>> 18) & 0x3f];
1344 *d
++ = digits
[(v
>> 12) & 0x3f];
1347 v
= (((int)s
[0] << 16) & 0xff0000)
1348 | (((int)s
[1] << 8) & 0xff00);
1349 *d
++ = digits
[(v
>> 18) & 0x3f];
1350 *d
++ = digits
[(v
>> 12) & 0x3f];
1351 *d
++ = digits
[(v
>> 6) & 0x3f];
1354 /* Add trailing NUL character so output is a valid C string. */