2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_format_pax.c,v 1.41 2007/05/29 01:00:19 kientzle Exp $");
40 #include "archive_entry.h"
41 #include "archive_private.h"
42 #include "archive_write_private.h"
45 uint64_t entry_bytes_remaining
;
46 uint64_t entry_padding
;
47 struct archive_string pax_header
;
50 static void add_pax_attr(struct archive_string
*, const char *key
,
52 static void add_pax_attr_int(struct archive_string
*,
53 const char *key
, int64_t value
);
54 static void add_pax_attr_time(struct archive_string
*,
55 const char *key
, int64_t sec
,
57 static void add_pax_attr_w(struct archive_string
*,
58 const char *key
, const wchar_t *wvalue
);
59 static ssize_t
archive_write_pax_data(struct archive_write
*,
60 const void *, size_t);
61 static int archive_write_pax_finish(struct archive_write
*);
62 static int archive_write_pax_destroy(struct archive_write
*);
63 static int archive_write_pax_finish_entry(struct archive_write
*);
64 static int archive_write_pax_header(struct archive_write
*,
65 struct archive_entry
*);
66 static char *base64_encode(const char *src
, size_t len
);
67 static char *build_pax_attribute_name(char *dest
, const char *src
);
68 static char *build_ustar_entry_name(char *dest
, const char *src
,
69 size_t src_length
, const char *insert
);
70 static char *format_int(char *dest
, int64_t);
71 static int has_non_ASCII(const wchar_t *);
72 static char *url_encode(const char *in
);
73 static int write_nulls(struct archive_write
*, size_t);
76 * Set output format to 'restricted pax' format.
78 * This is the same as normal 'pax', but tries to suppress
79 * the pax header whenever possible. This is the default for
80 * bsdtar, for instance.
83 archive_write_set_format_pax_restricted(struct archive
*_a
)
85 struct archive_write
*a
= (struct archive_write
*)_a
;
87 r
= archive_write_set_format_pax(&a
->archive
);
88 a
->archive_format
= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
;
89 a
->archive_format_name
= "restricted POSIX pax interchange";
94 * Set output format to 'pax' format.
97 archive_write_set_format_pax(struct archive
*_a
)
99 struct archive_write
*a
= (struct archive_write
*)_a
;
102 if (a
->format_destroy
!= NULL
)
103 (a
->format_destroy
)(a
);
105 pax
= (struct pax
*)malloc(sizeof(*pax
));
107 archive_set_error(&a
->archive
, ENOMEM
, "Can't allocate pax data");
108 return (ARCHIVE_FATAL
);
110 memset(pax
, 0, sizeof(*pax
));
111 a
->format_data
= pax
;
113 a
->pad_uncompressed
= 1;
114 a
->format_write_header
= archive_write_pax_header
;
115 a
->format_write_data
= archive_write_pax_data
;
116 a
->format_finish
= archive_write_pax_finish
;
117 a
->format_destroy
= archive_write_pax_destroy
;
118 a
->format_finish_entry
= archive_write_pax_finish_entry
;
119 a
->archive_format
= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
;
120 a
->archive_format_name
= "POSIX pax interchange";
125 * Note: This code assumes that 'nanos' has the same sign as 'sec',
126 * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
127 * and not -0.8 seconds. This is a pretty pedantic point, as we're
128 * unlikely to encounter many real files created before Jan 1, 1970,
129 * much less ones with timestamps recorded to sub-second resolution.
132 add_pax_attr_time(struct archive_string
*as
, const char *key
,
133 int64_t sec
, unsigned long nanos
)
138 * Note that each byte contributes fewer than 3 base-10
139 * digits, so this will always be big enough.
141 char tmp
[1 + 3*sizeof(sec
) + 1 + 3*sizeof(nanos
)];
143 tmp
[sizeof(tmp
) - 1] = 0;
144 t
= tmp
+ sizeof(tmp
) - 1;
146 /* Skip trailing zeros in the fractional part. */
147 for (digit
= 0, i
= 10; i
> 0 && digit
== 0; i
--) {
152 /* Only format the fraction if it's non-zero. */
155 *--t
= "0123456789"[digit
];
162 t
= format_int(t
, sec
);
164 add_pax_attr(as
, key
, t
);
168 format_int(char *t
, int64_t i
)
179 *--t
= "0123456789"[i
% 10];
187 add_pax_attr_int(struct archive_string
*as
, const char *key
, int64_t value
)
189 char tmp
[1 + 3 * sizeof(value
)];
191 tmp
[sizeof(tmp
) - 1] = 0;
192 add_pax_attr(as
, key
, format_int(tmp
+ sizeof(tmp
) - 1, value
));
196 utf8_encode(const wchar_t *wval
)
201 char *utf8_value
, *p
;
204 for (wp
= wval
; *wp
!= L
'\0'; ) {
208 else if (wc
<= 0x7ff)
210 else if (wc
<= 0xffff)
212 else if (wc
<= 0x1fffff)
214 else if (wc
<= 0x3ffffff)
216 else if (wc
<= 0x7fffffff)
218 /* Ignore larger values; UTF-8 can't encode them. */
221 utf8_value
= (char *)malloc(utf8len
+ 1);
222 if (utf8_value
== NULL
) {
223 __archive_errx(1, "Not enough memory for attributes");
227 for (wp
= wval
, p
= utf8_value
; *wp
!= L
'\0'; ) {
231 } else if (wc
<= 0x7ff) {
232 p
[0] = 0xc0 | ((wc
>> 6) & 0x1f);
233 p
[1] = 0x80 | (wc
& 0x3f);
235 } else if (wc
<= 0xffff) {
236 p
[0] = 0xe0 | ((wc
>> 12) & 0x0f);
237 p
[1] = 0x80 | ((wc
>> 6) & 0x3f);
238 p
[2] = 0x80 | (wc
& 0x3f);
240 } else if (wc
<= 0x1fffff) {
241 p
[0] = 0xf0 | ((wc
>> 18) & 0x07);
242 p
[1] = 0x80 | ((wc
>> 12) & 0x3f);
243 p
[2] = 0x80 | ((wc
>> 6) & 0x3f);
244 p
[3] = 0x80 | (wc
& 0x3f);
246 } else if (wc
<= 0x3ffffff) {
247 p
[0] = 0xf8 | ((wc
>> 24) & 0x03);
248 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
249 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
250 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
251 p
[4] = 0x80 | (wc
& 0x3f);
253 } else if (wc
<= 0x7fffffff) {
254 p
[0] = 0xfc | ((wc
>> 30) & 0x01);
255 p
[1] = 0x80 | ((wc
>> 24) & 0x3f);
256 p
[1] = 0x80 | ((wc
>> 18) & 0x3f);
257 p
[2] = 0x80 | ((wc
>> 12) & 0x3f);
258 p
[3] = 0x80 | ((wc
>> 6) & 0x3f);
259 p
[4] = 0x80 | (wc
& 0x3f);
262 /* Ignore larger values; UTF-8 can't encode them. */
270 add_pax_attr_w(struct archive_string
*as
, const char *key
, const wchar_t *wval
)
272 char *utf8_value
= utf8_encode(wval
);
273 if (utf8_value
== NULL
)
275 add_pax_attr(as
, key
, utf8_value
);
280 * Add a key/value attribute to the pax header. This function handles
281 * the length field and various other syntactic requirements.
284 add_pax_attr(struct archive_string
*as
, const char *key
, const char *value
)
286 int digits
, i
, len
, next_ten
;
287 char tmp
[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */
290 * PAX attributes have the following layout:
291 * <len> <space> <key> <=> <value> <nl>
293 len
= 1 + strlen(key
) + 1 + strlen(value
) + 1;
296 * The <len> field includes the length of the <len> field, so
297 * computing the correct length is tricky. I start by
298 * counting the number of base-10 digits in 'len' and
299 * computing the next higher power of 10.
307 next_ten
= next_ten
* 10;
310 * For example, if string without the length field is 99
311 * chars, then adding the 2 digit length "99" will force the
312 * total length past 100, requiring an extra digit. The next
313 * statement adjusts for this effect.
315 if (len
+ digits
>= next_ten
)
318 /* Now, we have the right length so we can build the line. */
319 tmp
[sizeof(tmp
) - 1] = 0; /* Null-terminate the work area. */
320 archive_strcat(as
, format_int(tmp
+ sizeof(tmp
) - 1, len
+ digits
));
321 archive_strappend_char(as
, ' ');
322 archive_strcat(as
, key
);
323 archive_strappend_char(as
, '=');
324 archive_strcat(as
, value
);
325 archive_strappend_char(as
, '\n');
329 archive_write_pax_header_xattrs(struct pax
*pax
, struct archive_entry
*entry
)
331 struct archive_string s
;
332 int i
= archive_entry_xattr_reset(entry
);
338 char *url_encoded_name
= NULL
, *encoded_name
= NULL
;
339 wchar_t *wcs_name
= NULL
;
342 archive_entry_xattr_next(entry
, &name
, &value
, &size
);
343 /* Name is URL-encoded, then converted to wchar_t,
344 * then UTF-8 encoded. */
345 url_encoded_name
= url_encode(name
);
346 if (url_encoded_name
!= NULL
) {
347 /* Convert narrow-character to wide-character. */
348 int wcs_length
= strlen(url_encoded_name
);
349 wcs_name
= (wchar_t *)malloc((wcs_length
+ 1) * sizeof(wchar_t));
350 if (wcs_name
== NULL
)
351 __archive_errx(1, "No memory for xattr conversion");
352 mbstowcs(wcs_name
, url_encoded_name
, wcs_length
);
353 wcs_name
[wcs_length
] = 0;
354 free(url_encoded_name
); /* Done with this. */
356 if (wcs_name
!= NULL
) {
357 encoded_name
= utf8_encode(wcs_name
);
358 free(wcs_name
); /* Done with wchar_t name. */
361 encoded_value
= base64_encode((const char *)value
, size
);
363 if (encoded_name
!= NULL
&& encoded_value
!= NULL
) {
364 archive_string_init(&s
);
365 archive_strcpy(&s
, "LIBARCHIVE.xattr.");
366 archive_strcat(&s
, encoded_name
);
367 add_pax_attr(&(pax
->pax_header
), s
.s
, encoded_value
);
368 archive_string_free(&s
);
376 * TODO: Consider adding 'comment' and 'charset' fields to
377 * archive_entry so that clients can specify them. Also, consider
378 * adding generic key/value tags so clients can add arbitrary
382 archive_write_pax_header(struct archive_write
*a
,
383 struct archive_entry
*entry_original
)
385 struct archive_entry
*entry_main
;
386 const char *linkname
, *p
;
388 const char *hardlink
;
390 const char *suffix_start
;
391 int need_extension
, r
, ret
;
396 char ustar_entry_name
[256];
397 char pax_entry_name
[256];
400 pax
= (struct pax
*)a
->format_data
;
402 hardlink
= archive_entry_hardlink(entry_original
);
404 /* Make sure this is a type of entry that we can handle here */
405 if (hardlink
== NULL
) {
406 switch (archive_entry_filetype(entry_original
)) {
415 * Ensure a trailing '/'. Modify the original
416 * entry so the client sees the change.
418 p
= archive_entry_pathname(entry_original
);
419 if (p
[strlen(p
) - 1] != '/') {
420 t
= (char *)malloc(strlen(p
) + 2);
422 archive_set_error(&a
->archive
, ENOMEM
,
423 "Can't allocate pax data");
424 return(ARCHIVE_FATAL
);
428 archive_entry_copy_pathname(entry_original
, t
);
433 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
434 "tar format cannot archive this (type=0%lo)",
435 (unsigned long)archive_entry_filetype(entry_original
));
436 return (ARCHIVE_WARN
);
440 /* Copy entry so we can modify it as needed. */
441 entry_main
= archive_entry_clone(entry_original
);
442 archive_string_empty(&(pax
->pax_header
)); /* Blank our work area. */
445 * Determining whether or not the name is too big is ugly
446 * because of the rules for dividing names between 'name' and
447 * 'prefix' fields. Here, I pick out the longest possible
448 * suffix, then test whether the remaining prefix is too long.
450 wp
= archive_entry_pathname_w(entry_main
);
451 p
= archive_entry_pathname(entry_main
);
452 if (strlen(p
) <= 100) /* Short enough for just 'name' field */
453 suffix_start
= p
; /* Record a zero-length prefix */
455 /* Find the largest suffix that fits in 'name' field. */
456 suffix_start
= strchr(p
+ strlen(p
) - 100 - 1, '/');
459 * If name is too long, or has non-ASCII characters, add
460 * 'path' to pax extended attrs.
462 if (suffix_start
== NULL
|| suffix_start
- p
> 155 || has_non_ASCII(wp
)) {
463 add_pax_attr_w(&(pax
->pax_header
), "path", wp
);
464 archive_entry_set_pathname(entry_main
,
465 build_ustar_entry_name(ustar_entry_name
, p
, strlen(p
), NULL
));
469 /* If link name is too long or has non-ASCII characters, add
470 * 'linkpath' to pax extended attrs. */
472 if (linkname
== NULL
)
473 linkname
= archive_entry_symlink(entry_main
);
475 if (linkname
!= NULL
) {
476 /* There is a link name, get the wide version as well. */
477 if (hardlink
!= NULL
)
478 wp
= archive_entry_hardlink_w(entry_main
);
480 wp
= archive_entry_symlink_w(entry_main
);
482 /* If the link is long or has a non-ASCII character,
483 * store it as a pax extended attribute. */
484 if (strlen(linkname
) > 100 || has_non_ASCII(wp
)) {
485 add_pax_attr_w(&(pax
->pax_header
), "linkpath", wp
);
486 if (hardlink
!= NULL
)
487 archive_entry_set_hardlink(entry_main
,
488 "././@LongHardLink");
490 archive_entry_set_symlink(entry_main
,
496 /* If file size is too large, add 'size' to pax extended attrs. */
497 if (archive_entry_size(entry_main
) >= (((int64_t)1) << 33)) {
498 add_pax_attr_int(&(pax
->pax_header
), "size",
499 archive_entry_size(entry_main
));
503 /* If numeric GID is too large, add 'gid' to pax extended attrs. */
504 if (archive_entry_gid(entry_main
) >= (1 << 18)) {
505 add_pax_attr_int(&(pax
->pax_header
), "gid",
506 archive_entry_gid(entry_main
));
510 /* If group name is too large or has non-ASCII characters, add
511 * 'gname' to pax extended attrs. */
512 p
= archive_entry_gname(entry_main
);
513 wp
= archive_entry_gname_w(entry_main
);
514 if (p
!= NULL
&& (strlen(p
) > 31 || has_non_ASCII(wp
))) {
515 add_pax_attr_w(&(pax
->pax_header
), "gname", wp
);
516 archive_entry_set_gname(entry_main
, NULL
);
520 /* If numeric UID is too large, add 'uid' to pax extended attrs. */
521 if (archive_entry_uid(entry_main
) >= (1 << 18)) {
522 add_pax_attr_int(&(pax
->pax_header
), "uid",
523 archive_entry_uid(entry_main
));
527 /* If user name is too large, add 'uname' to pax extended attrs. */
528 /* TODO: If uname has non-ASCII characters, use pax attribute. */
529 p
= archive_entry_uname(entry_main
);
530 wp
= archive_entry_uname_w(entry_main
);
531 if (p
!= NULL
&& (strlen(p
) > 31 || has_non_ASCII(wp
))) {
532 add_pax_attr_w(&(pax
->pax_header
), "uname", wp
);
533 archive_entry_set_uname(entry_main
, NULL
);
538 * POSIX/SUSv3 doesn't provide a standard key for large device
539 * numbers. I use the same keys here that Joerg Schilling
540 * used for 'star.' (Which, somewhat confusingly, are called
541 * "devXXX" even though they code "rdev" values.) No doubt,
542 * other implementations use other keys. Note that there's no
543 * reason we can't write the same information into a number of
546 * Of course, this is only needed for block or char device entries.
548 if (archive_entry_filetype(entry_main
) == AE_IFBLK
549 || archive_entry_filetype(entry_main
) == AE_IFCHR
) {
551 * If rdevmajor is too large, add 'SCHILY.devmajor' to
552 * extended attributes.
554 dev_t rdevmajor
, rdevminor
;
555 rdevmajor
= archive_entry_rdevmajor(entry_main
);
556 rdevminor
= archive_entry_rdevminor(entry_main
);
557 if (rdevmajor
>= (1 << 18)) {
558 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devmajor",
561 * Non-strict formatting below means we don't
562 * have to truncate here. Not truncating improves
563 * the chance that some more modern tar archivers
564 * (such as GNU tar 1.13) can restore the full
565 * value even if they don't understand the pax
566 * extended attributes. See my rant below about
567 * file size fields for additional details.
569 /* archive_entry_set_rdevmajor(entry_main,
570 rdevmajor & ((1 << 18) - 1)); */
575 * If devminor is too large, add 'SCHILY.devminor' to
576 * extended attributes.
578 if (rdevminor
>= (1 << 18)) {
579 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.devminor",
581 /* Truncation is not necessary here, either. */
582 /* archive_entry_set_rdevminor(entry_main,
583 rdevminor & ((1 << 18) - 1)); */
589 * Technically, the mtime field in the ustar header can
590 * support 33 bits, but many platforms use signed 32-bit time
591 * values. The cutoff of 0x7fffffff here is a compromise.
592 * Yes, this check is duplicated just below; this helps to
593 * avoid writing an mtime attribute just to handle a
594 * high-resolution timestamp in "restricted pax" mode.
596 if (!need_extension
&&
597 ((archive_entry_mtime(entry_main
) < 0)
598 || (archive_entry_mtime(entry_main
) >= 0x7fffffff)))
601 /* I use a star-compatible file flag attribute. */
602 p
= archive_entry_fflags_text(entry_main
);
603 if (!need_extension
&& p
!= NULL
&& *p
!= '\0')
606 /* If there are non-trivial ACL entries, we need an extension. */
607 if (!need_extension
&& archive_entry_acl_count(entry_original
,
608 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
) > 0)
611 /* If there are non-trivial ACL entries, we need an extension. */
612 if (!need_extension
&& archive_entry_acl_count(entry_original
,
613 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
) > 0)
616 /* If there are extended attributes, we need an extension */
617 if (!need_extension
&& archive_entry_xattr_count(entry_original
) > 0)
621 * The following items are handled differently in "pax
622 * restricted" format. In particular, in "pax restricted"
623 * format they won't be added unless need_extension is
624 * already set (we're already generating an extended header, so
625 * may as well include these).
627 if (a
->archive_format
!= ARCHIVE_FORMAT_TAR_PAX_RESTRICTED
||
630 if (archive_entry_mtime(entry_main
) < 0 ||
631 archive_entry_mtime(entry_main
) >= 0x7fffffff ||
632 archive_entry_mtime_nsec(entry_main
) != 0)
633 add_pax_attr_time(&(pax
->pax_header
), "mtime",
634 archive_entry_mtime(entry_main
),
635 archive_entry_mtime_nsec(entry_main
));
637 if (archive_entry_ctime(entry_main
) != 0 ||
638 archive_entry_ctime_nsec(entry_main
) != 0)
639 add_pax_attr_time(&(pax
->pax_header
), "ctime",
640 archive_entry_ctime(entry_main
),
641 archive_entry_ctime_nsec(entry_main
));
643 if (archive_entry_atime(entry_main
) != 0 ||
644 archive_entry_atime_nsec(entry_main
) != 0)
645 add_pax_attr_time(&(pax
->pax_header
), "atime",
646 archive_entry_atime(entry_main
),
647 archive_entry_atime_nsec(entry_main
));
649 /* I use a star-compatible file flag attribute. */
650 p
= archive_entry_fflags_text(entry_main
);
651 if (p
!= NULL
&& *p
!= '\0')
652 add_pax_attr(&(pax
->pax_header
), "SCHILY.fflags", p
);
654 /* I use star-compatible ACL attributes. */
655 wp
= archive_entry_acl_text_w(entry_original
,
656 ARCHIVE_ENTRY_ACL_TYPE_ACCESS
|
657 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
658 if (wp
!= NULL
&& *wp
!= L
'\0')
659 add_pax_attr_w(&(pax
->pax_header
),
660 "SCHILY.acl.access", wp
);
661 wp
= archive_entry_acl_text_w(entry_original
,
662 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT
|
663 ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID
);
664 if (wp
!= NULL
&& *wp
!= L
'\0')
665 add_pax_attr_w(&(pax
->pax_header
),
666 "SCHILY.acl.default", wp
);
668 /* Include star-compatible metadata info. */
669 /* Note: "SCHILY.dev{major,minor}" are NOT the
670 * major/minor portions of "SCHILY.dev". */
671 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.dev",
672 archive_entry_dev(entry_main
));
673 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.ino",
674 archive_entry_ino(entry_main
));
675 add_pax_attr_int(&(pax
->pax_header
), "SCHILY.nlink",
676 archive_entry_nlink(entry_main
));
678 /* Store extended attributes */
679 archive_write_pax_header_xattrs(pax
, entry_original
);
682 /* Only regular files have data. */
683 if (archive_entry_filetype(entry_main
) != AE_IFREG
)
684 archive_entry_set_size(entry_main
, 0);
687 * Pax-restricted does not store data for hardlinks, in order
688 * to improve compatibility with ustar.
690 if (a
->archive_format
!= ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE
&&
692 archive_entry_set_size(entry_main
, 0);
695 * XXX Full pax interchange format does permit a hardlink
696 * entry to have data associated with it. I'm not supporting
697 * that here because the client expects me to tell them whether
698 * or not this format expects data for hardlinks. If I
699 * don't check here, then every pax archive will end up with
700 * duplicated data for hardlinks. Someday, there may be
701 * need to select this behavior, in which case the following
702 * will need to be revisited. XXX
704 if (hardlink
!= NULL
)
705 archive_entry_set_size(entry_main
, 0);
707 /* Format 'ustar' header for main entry.
709 * The trouble with file size: If the reader can't understand
710 * the file size, they may not be able to locate the next
711 * entry and the rest of the archive is toast. Pax-compliant
712 * readers are supposed to ignore the file size in the main
713 * header, so the question becomes how to maximize portability
714 * for readers that don't support pax attribute extensions.
715 * For maximum compatibility, I permit numeric extensions in
716 * the main header so that the file size stored will always be
717 * correct, even if it's in a format that only some
718 * implementations understand. The technique used here is:
720 * a) If possible, follow the standard exactly. This handles
721 * files up to 8 gigabytes minus 1.
723 * b) If that fails, try octal but omit the field terminator.
724 * That handles files up to 64 gigabytes minus 1.
726 * c) Otherwise, use base-256 extensions. That handles files
727 * up to 2^63 in this implementation, with the potential to
728 * go up to 2^94. That should hold us for a while. ;-)
730 * The non-strict formatter uses similar logic for other
731 * numeric fields, though they're less critical.
733 __archive_write_format_header_ustar(a
, ustarbuff
, entry_main
, -1, 0);
735 /* If we built any extended attributes, write that entry first. */
737 if (archive_strlen(&(pax
->pax_header
)) > 0) {
738 struct archive_entry
*pax_attr_entry
;
745 pax_attr_entry
= archive_entry_new();
746 p
= archive_entry_pathname(entry_main
);
747 archive_entry_set_pathname(pax_attr_entry
,
748 build_pax_attribute_name(pax_entry_name
, p
));
749 archive_entry_set_size(pax_attr_entry
,
750 archive_strlen(&(pax
->pax_header
)));
751 /* Copy uid/gid (but clip to ustar limits). */
752 uid
= archive_entry_uid(entry_main
);
755 archive_entry_set_uid(pax_attr_entry
, uid
);
756 gid
= archive_entry_gid(entry_main
);
759 archive_entry_set_gid(pax_attr_entry
, gid
);
760 /* Copy mode over (but not setuid/setgid bits) */
761 mode
= archive_entry_mode(entry_main
);
771 archive_entry_set_mode(pax_attr_entry
, mode
);
773 /* Copy uname/gname. */
774 archive_entry_set_uname(pax_attr_entry
,
775 archive_entry_uname(entry_main
));
776 archive_entry_set_gname(pax_attr_entry
,
777 archive_entry_gname(entry_main
));
779 /* Copy mtime, but clip to ustar limits. */
780 s
= archive_entry_mtime(entry_main
);
781 ns
= archive_entry_mtime_nsec(entry_main
);
782 if (s
< 0) { s
= 0; ns
= 0; }
783 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
784 archive_entry_set_mtime(pax_attr_entry
, s
, ns
);
786 /* Ditto for atime. */
787 s
= archive_entry_atime(entry_main
);
788 ns
= archive_entry_atime_nsec(entry_main
);
789 if (s
< 0) { s
= 0; ns
= 0; }
790 if (s
> 0x7fffffff) { s
= 0x7fffffff; ns
= 0; }
791 archive_entry_set_atime(pax_attr_entry
, s
, ns
);
793 /* Standard ustar doesn't support ctime. */
794 archive_entry_set_ctime(pax_attr_entry
, 0, 0);
796 ret
= __archive_write_format_header_ustar(a
, paxbuff
,
797 pax_attr_entry
, 'x', 1);
799 archive_entry_free(pax_attr_entry
);
801 /* Note that the 'x' header shouldn't ever fail to format */
803 const char *msg
= "archive_write_pax_header: "
804 "'x' header failed?! This can't happen.\n";
805 write(2, msg
, strlen(msg
));
808 r
= (a
->compressor
.write
)(a
, paxbuff
, 512);
809 if (r
!= ARCHIVE_OK
) {
810 pax
->entry_bytes_remaining
= 0;
811 pax
->entry_padding
= 0;
812 return (ARCHIVE_FATAL
);
815 pax
->entry_bytes_remaining
= archive_strlen(&(pax
->pax_header
));
816 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
818 r
= (a
->compressor
.write
)(a
, pax
->pax_header
.s
,
819 archive_strlen(&(pax
->pax_header
)));
820 if (r
!= ARCHIVE_OK
) {
821 /* If a write fails, we're pretty much toast. */
822 return (ARCHIVE_FATAL
);
824 /* Pad out the end of the entry. */
825 r
= write_nulls(a
, pax
->entry_padding
);
826 if (r
!= ARCHIVE_OK
) {
827 /* If a write fails, we're pretty much toast. */
828 return (ARCHIVE_FATAL
);
830 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
833 /* Write the header for main entry. */
834 r
= (a
->compressor
.write
)(a
, ustarbuff
, 512);
839 * Inform the client of the on-disk size we're using, so
840 * they can avoid unnecessarily writing a body for something
841 * that we're just going to ignore.
843 archive_entry_set_size(entry_original
, archive_entry_size(entry_main
));
844 pax
->entry_bytes_remaining
= archive_entry_size(entry_main
);
845 pax
->entry_padding
= 0x1ff & (-(int64_t)pax
->entry_bytes_remaining
);
846 archive_entry_free(entry_main
);
852 * We need a valid name for the regular 'ustar' entry. This routine
853 * tries to hack something more-or-less reasonable.
855 * The approach here tries to preserve leading dir names. We do so by
856 * working with four sections:
857 * 1) "prefix" directory names,
858 * 2) "suffix" directory names,
859 * 3) inserted dir name (optional),
862 * These sections must satisfy the following requirements:
863 * * Parts 1 & 2 together form an initial portion of the dir name.
864 * * Part 3 is specified by the caller. (It should not contain a leading
866 * * Part 4 forms an initial portion of the base filename.
867 * * The filename must be <= 99 chars to fit the ustar 'name' field.
868 * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
869 * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
870 * * If the original name ends in a '/', the new name must also end in a '/'
871 * * Trailing '/.' sequences may be stripped.
873 * Note: Recall that the ustar format does not store the '/' separating
874 * parts 1 & 2, but does store the '/' separating parts 2 & 3.
877 build_ustar_entry_name(char *dest
, const char *src
, size_t src_length
,
880 const char *prefix
, *prefix_end
;
881 const char *suffix
, *suffix_end
;
882 const char *filename
, *filename_end
;
884 int need_slash
= 0; /* Was there a trailing slash? */
885 size_t suffix_length
= 99;
888 /* Length of additional dir element to be added. */
892 /* +2 here allows for '/' before and after the insert. */
893 insert_length
= strlen(insert
) + 2;
895 /* Step 0: Quick bailout in a common case. */
896 if (src_length
< 100 && insert
== NULL
) {
897 strncpy(dest
, src
, src_length
);
898 dest
[src_length
] = '\0';
902 /* Step 1: Locate filename and enforce the length restriction. */
903 filename_end
= src
+ src_length
;
904 /* Remove trailing '/' chars and '/.' pairs. */
906 if (filename_end
> src
&& filename_end
[-1] == '/') {
908 need_slash
= 1; /* Remember to restore trailing '/'. */
911 if (filename_end
> src
+ 1 && filename_end
[-1] == '.'
912 && filename_end
[-2] == '/') {
914 need_slash
= 1; /* "foo/." will become "foo/" */
921 /* Find start of filename. */
922 filename
= filename_end
- 1;
923 while ((filename
> src
) && (*filename
!= '/'))
925 if ((*filename
== '/') && (filename
< filename_end
- 1))
927 /* Adjust filename_end so that filename + insert fits in 99 chars. */
928 suffix_length
-= insert_length
;
929 if (filename_end
> filename
+ suffix_length
)
930 filename_end
= filename
+ suffix_length
;
931 /* Calculate max size for "suffix" section (#3 above). */
932 suffix_length
-= filename_end
- filename
;
934 /* Step 2: Locate the "prefix" section of the dirname, including
937 prefix_end
= prefix
+ 155;
938 if (prefix_end
> filename
)
939 prefix_end
= filename
;
940 while (prefix_end
> prefix
&& *prefix_end
!= '/')
942 if ((prefix_end
< filename
) && (*prefix_end
== '/'))
945 /* Step 3: Locate the "suffix" section of the dirname,
946 * including trailing '/'. */
948 suffix_end
= suffix
+ suffix_length
; /* Enforce limit. */
949 if (suffix_end
> filename
)
950 suffix_end
= filename
;
951 if (suffix_end
< suffix
)
953 while (suffix_end
> suffix
&& *suffix_end
!= '/')
955 if ((suffix_end
< filename
) && (*suffix_end
== '/'))
958 /* Step 4: Build the new name. */
959 /* The OpenBSD strlcpy function is safer, but less portable. */
960 /* Rather than maintain two versions, just use the strncpy version. */
962 if (prefix_end
> prefix
) {
963 strncpy(p
, prefix
, prefix_end
- prefix
);
964 p
+= prefix_end
- prefix
;
966 if (suffix_end
> suffix
) {
967 strncpy(p
, suffix
, suffix_end
- suffix
);
968 p
+= suffix_end
- suffix
;
970 if (insert
!= NULL
) {
971 /* Note: assume insert does not have leading or trailing '/' */
976 strncpy(p
, filename
, filename_end
- filename
);
977 p
+= filename_end
- filename
;
986 * The ustar header for the pax extended attributes must have a
987 * reasonable name: SUSv3 suggests 'dirname'/PaxHeader/'filename'
989 * Joerg Schiling has argued that this is unnecessary because, in practice,
990 * if the pax extended attributes get extracted as regular files, noone is
991 * going to bother reading those attributes to manually restore them.
992 * Based on this, 'star' uses /tmp/PaxHeader/'basename' as the ustar header
993 * name. This is a tempting argument, but I'm not entirely convinced.
994 * I'm also uncomfortable with the fact that "/tmp" is a Unix-ism.
996 * The following routine implements the SUSv3 recommendation, and is
997 * much simpler because build_ustar_entry_name() above already does
998 * most of the work (we just need to give it an extra path element to
999 * insert and handle a few pathological cases).
1002 build_pax_attribute_name(char *dest
, const char *src
)
1006 /* Handle the null filename case. */
1007 if (src
== NULL
|| *src
== '\0') {
1008 strcpy(dest
, "PaxHeader/blank");
1012 /* Prune final '/' and other unwanted final elements. */
1013 p
= src
+ strlen(src
);
1015 /* Ends in "/", remove the '/' */
1016 if (p
> src
&& p
[-1] == '/') {
1020 /* Ends in "/.", remove the '.' */
1021 if (p
> src
+ 1 && p
[-1] == '.'
1029 /* Pathological case: After above, there was nothing left.
1030 * This includes "/." "/./." "/.//./." etc. */
1032 strcpy(dest
, "/PaxHeader/rootdir");
1036 /* Convert unadorned "." into a suitable filename. */
1037 if (*src
== '.' && p
== src
+ 1) {
1038 strcpy(dest
, "PaxHeader/currentdir");
1042 /* General case: build a ustar-compatible name adding "/PaxHeader/". */
1043 build_ustar_entry_name(dest
, src
, p
- src
, "PaxHeader");
1048 /* Write two null blocks for the end of archive */
1050 archive_write_pax_finish(struct archive_write
*a
)
1055 if (a
->compressor
.write
== NULL
)
1056 return (ARCHIVE_OK
);
1058 pax
= (struct pax
*)a
->format_data
;
1059 r
= write_nulls(a
, 512 * 2);
1064 archive_write_pax_destroy(struct archive_write
*a
)
1068 pax
= (struct pax
*)a
->format_data
;
1069 archive_string_free(&pax
->pax_header
);
1071 a
->format_data
= NULL
;
1072 return (ARCHIVE_OK
);
1076 archive_write_pax_finish_entry(struct archive_write
*a
)
1081 pax
= (struct pax
*)a
->format_data
;
1082 ret
= write_nulls(a
, pax
->entry_bytes_remaining
+ pax
->entry_padding
);
1083 pax
->entry_bytes_remaining
= pax
->entry_padding
= 0;
1088 write_nulls(struct archive_write
*a
, size_t padding
)
1092 while (padding
> 0) {
1093 to_write
= padding
< a
->null_length
? padding
: a
->null_length
;
1094 ret
= (a
->compressor
.write
)(a
, a
->nulls
, to_write
);
1095 if (ret
!= ARCHIVE_OK
)
1097 padding
-= to_write
;
1099 return (ARCHIVE_OK
);
1103 archive_write_pax_data(struct archive_write
*a
, const void *buff
, size_t s
)
1108 pax
= (struct pax
*)a
->format_data
;
1109 if (s
> pax
->entry_bytes_remaining
)
1110 s
= pax
->entry_bytes_remaining
;
1112 ret
= (a
->compressor
.write
)(a
, buff
, s
);
1113 pax
->entry_bytes_remaining
-= s
;
1114 if (ret
== ARCHIVE_OK
)
1121 has_non_ASCII(const wchar_t *wp
)
1123 while (*wp
!= L
'\0' && *wp
< 128)
1125 return (*wp
!= L
'\0');
1129 * Used by extended attribute support; encodes the name
1130 * so that there will be no '=' characters in the result.
1133 url_encode(const char *in
)
1140 for (s
= in
; *s
!= '\0'; s
++) {
1141 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=')
1147 out
= (char *)malloc(out_len
+ 1);
1151 for (s
= in
, d
= out
; *s
!= '\0'; s
++) {
1152 /* encode any non-printable ASCII character or '%' or '=' */
1153 if (*s
< 33 || *s
> 126 || *s
== '%' || *s
== '=') {
1154 /* URL encoding is '%' followed by two hex digits */
1156 *d
++ = "0123456789ABCDEF"[0x0f & (*s
>> 4)];
1157 *d
++ = "0123456789ABCDEF"[0x0f & *s
];
1167 * Encode a sequence of bytes into a C string using base-64 encoding.
1169 * Returns a null-terminated C string allocated with malloc(); caller
1170 * is responsible for freeing the result.
1173 base64_encode(const char *s
, size_t len
)
1175 static const char digits
[64] =
1176 { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
1177 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
1178 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
1179 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
1184 /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
1185 out
= (char *)malloc((len
* 4 + 2) / 3 + 1);
1190 /* Convert each group of 3 bytes into 4 characters. */
1192 v
= (((int)s
[0] << 16) & 0xff0000)
1193 | (((int)s
[1] << 8) & 0xff00)
1194 | (((int)s
[2]) & 0x00ff);
1197 *d
++ = digits
[(v
>> 18) & 0x3f];
1198 *d
++ = digits
[(v
>> 12) & 0x3f];
1199 *d
++ = digits
[(v
>> 6) & 0x3f];
1200 *d
++ = digits
[(v
) & 0x3f];
1202 /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
1206 v
= (((int)s
[0] << 16) & 0xff0000);
1207 *d
++ = digits
[(v
>> 18) & 0x3f];
1208 *d
++ = digits
[(v
>> 12) & 0x3f];
1211 v
= (((int)s
[0] << 16) & 0xff0000)
1212 | (((int)s
[1] << 8) & 0xff00);
1213 *d
++ = digits
[(v
>> 18) & 0x3f];
1214 *d
++ = digits
[(v
>> 12) & 0x3f];
1215 *d
++ = digits
[(v
>> 6) & 0x3f];
1218 /* Add trailing NUL character so output is a valid C string. */