2 * libdpkg - Debian packaging suite library routines
3 * tarfn.c - tar archive extraction functions
5 * Copyright © 1995 Bruce Perens
6 * Copyright © 2007-2011, 2013-2017 Guillem Jover <guillem@debian.org>
8 * This is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <https://www.gnu.org/licenses/>.
25 #if HAVE_SYS_SYSMACROS_H
26 #include <sys/sysmacros.h>
39 #include <dpkg/macros.h>
40 #include <dpkg/dpkg.h>
41 #include <dpkg/i18n.h>
42 #include <dpkg/error.h>
43 #include <dpkg/tarfn.h>
45 #define TAR_MAGIC_USTAR "ustar\0" "00"
46 #define TAR_MAGIC_GNU "ustar " " \0"
48 #define TAR_TYPE_SIGNED(t) (!((t)0 < (t)-1))
50 #define TAR_TYPE_MIN(t) \
51 (TAR_TYPE_SIGNED(t) ? \
52 ~(t)TAR_TYPE_MAX(t) : \
54 #define TAR_TYPE_MAX(t) \
55 (TAR_TYPE_SIGNED(t) ? \
56 ((((t)1 << (sizeof(t) * 8 - 2)) - 1) * 2 + 1) : \
59 #define TAR_ATOUL(str, type) \
60 (type)tar_atoul(str, sizeof(str), TAR_TYPE_MAX(type))
61 #define TAR_ATOSL(str, type) \
62 (type)tar_atosl(str, sizeof(str), TAR_TYPE_MIN(type), TAR_TYPE_MAX(type))
75 /* Only valid on ustar and gnu. */
82 /* Only valid on ustar. */
86 static inline uintmax_t
87 tar_ret_errno(int err
, uintmax_t ret
)
94 * Convert an ASCII octal string to an intmax_t.
97 tar_atol8(const char *s
, size_t size
)
99 const char *end
= s
+ size
;
102 /* Old implementations might precede the value with spaces. */
103 while (s
< end
&& *s
== ' ')
107 return tar_ret_errno(EINVAL
, 0);
110 if (*s
== '\0' || *s
== ' ')
112 if (*s
< '0' || *s
> '7')
113 return tar_ret_errno(ERANGE
, 0);
114 n
= (n
* 010) + (*s
++ - '0');
118 if (*s
!= '\0' && *s
!= ' ')
119 return tar_ret_errno(EINVAL
, 0);
124 return tar_ret_errno(EINVAL
, 0);
126 return tar_ret_errno(0, n
);
130 * Convert a base-256 two-complement number to an intmax_t.
133 tar_atol256(const char *s
, size_t size
, intmax_t min
, uintmax_t max
)
139 /* The encoding always sets the first bit to one, so that it can be
140 * distinguished from the ASCII encoding. For positive numbers we
141 * need to reset it. For negative numbers we initialize n to -1. */
149 /* Check for overflows. */
150 while (size
> sizeof(uintmax_t)) {
152 return tar_ret_errno(ERANGE
, sign
? (uintmax_t)min
: max
);
157 if ((c
& 0x80) != (sign
& 0x80))
158 return tar_ret_errno(ERANGE
, sign
? (uintmax_t)min
: max
);
167 return tar_ret_errno(0, n
);
171 tar_atol(const char *s
, size_t size
, intmax_t min
, uintmax_t max
)
173 const unsigned char *a
= (const unsigned char *)s
;
175 /* Check if it is a long two-complement base-256 number, positive or
177 if (*a
== 0xff || *a
== 0x80)
178 return tar_atol256(s
, size
, min
, max
);
180 return tar_atol8(s
, size
);
184 tar_atoul(const char *s
, size_t size
, uintmax_t max
)
186 uintmax_t n
= tar_atol(s
, size
, 0, UINTMAX_MAX
);
189 return tar_ret_errno(ERANGE
, UINTMAX_MAX
);
195 tar_atosl(const char *s
, size_t size
, intmax_t min
, intmax_t max
)
197 intmax_t n
= tar_atol(s
, size
, INTMAX_MIN
, INTMAX_MAX
);
200 return tar_ret_errno(ERANGE
, INTMAX_MIN
);
202 return tar_ret_errno(ERANGE
, INTMAX_MAX
);
208 tar_header_get_prefix_name(struct tar_header
*h
)
210 return str_fmt("%.*s/%.*s", (int)sizeof(h
->prefix
), h
->prefix
,
211 (int)sizeof(h
->name
), h
->name
);
215 tar_header_get_unix_mode(struct tar_header
*h
)
218 enum tar_filetype type
;
220 type
= (enum tar_filetype
)h
->linkflag
;
223 case TAR_FILETYPE_FILE0
:
224 case TAR_FILETYPE_FILE
:
225 case TAR_FILETYPE_HARDLINK
:
228 case TAR_FILETYPE_SYMLINK
:
231 case TAR_FILETYPE_DIR
:
234 case TAR_FILETYPE_CHARDEV
:
237 case TAR_FILETYPE_BLOCKDEV
:
240 case TAR_FILETYPE_FIFO
:
248 mode
|= TAR_ATOUL(h
->mode
, mode_t
);
254 tar_header_checksum(struct tar_header
*h
)
256 unsigned char *s
= (unsigned char *)h
;
258 const size_t checksum_offset
= offsetof(struct tar_header
, checksum
);
261 /* Treat checksum field as all blank. */
262 sum
= ' ' * sizeof(h
->checksum
);
264 for (i
= checksum_offset
; i
> 0; i
--)
267 /* Skip the real checksum field. */
268 s
+= sizeof(h
->checksum
);
270 for (i
= TARBLKSZ
- checksum_offset
- sizeof(h
->checksum
); i
> 0; i
--)
277 tar_header_decode(struct tar_header
*h
, struct tar_entry
*d
, struct dpkg_error
*err
)
283 if (memcmp(h
->magic
, TAR_MAGIC_GNU
, 6) == 0)
284 d
->format
= TAR_FORMAT_GNU
;
285 else if (memcmp(h
->magic
, TAR_MAGIC_USTAR
, 6) == 0)
286 d
->format
= TAR_FORMAT_USTAR
;
288 d
->format
= TAR_FORMAT_OLD
;
290 d
->type
= (enum tar_filetype
)h
->linkflag
;
291 if (d
->type
== TAR_FILETYPE_FILE0
)
292 d
->type
= TAR_FILETYPE_FILE
;
294 /* Concatenate prefix and name to support ustar style long names. */
295 if (d
->format
== TAR_FORMAT_USTAR
&& h
->prefix
[0] != '\0')
296 d
->name
= tar_header_get_prefix_name(h
);
298 d
->name
= m_strndup(h
->name
, sizeof(h
->name
));
299 d
->linkname
= m_strndup(h
->linkname
, sizeof(h
->linkname
));
300 d
->stat
.mode
= tar_header_get_unix_mode(h
);
301 /* Even though off_t is signed, we use an unsigned parser here because
302 * negative offsets are not allowed. */
303 d
->size
= TAR_ATOUL(h
->size
, off_t
);
305 return dpkg_put_errno(err
, _("invalid tar header size field"));
306 d
->mtime
= TAR_ATOSL(h
->mtime
, time_t);
308 return dpkg_put_errno(err
, _("invalid tar header mtime field"));
310 if (d
->type
== TAR_FILETYPE_CHARDEV
|| d
->type
== TAR_FILETYPE_BLOCKDEV
)
311 d
->dev
= makedev(TAR_ATOUL(h
->devmajor
, dev_t
),
312 TAR_ATOUL(h
->devminor
, dev_t
));
314 d
->dev
= makedev(0, 0);
317 d
->stat
.uname
= m_strndup(h
->user
, sizeof(h
->user
));
319 d
->stat
.uname
= NULL
;
320 d
->stat
.uid
= TAR_ATOUL(h
->uid
, uid_t
);
322 return dpkg_put_errno(err
, _("invalid tar header uid field"));
325 d
->stat
.gname
= m_strndup(h
->group
, sizeof(h
->group
));
327 d
->stat
.gname
= NULL
;
328 d
->stat
.gid
= TAR_ATOUL(h
->gid
, gid_t
);
330 return dpkg_put_errno(err
, _("invalid tar header gid field"));
332 checksum
= tar_atol8(h
->checksum
, sizeof(h
->checksum
));
334 return dpkg_put_errno(err
, _("invalid tar header checksum field"));
336 if (tar_header_checksum(h
) != checksum
)
337 return dpkg_put_error(err
, _("invalid tar header checksum"));
343 * Decode a GNU longlink or longname from the tar archive.
345 * The way the GNU long{link,name} stuff works is like this:
347 * - The first header is a “dummy” header that contains the size of the
349 * - The next N headers contain the filename.
350 * - After the headers with the filename comes the “real” header with a
351 * bogus name or link.
354 tar_gnu_long(struct tar_archive
*tar
, struct tar_entry
*te
, char **longp
)
362 *longp
= bp
= m_malloc(te
->size
);
364 for (long_read
= te
->size
; long_read
> 0; long_read
-= TARBLKSZ
) {
367 status
= tar
->ops
->read(tar
, buf
, TARBLKSZ
);
368 if (status
== TARBLKSZ
)
371 /* Read partial header record? */
374 status
= dpkg_put_error(&tar
->err
,
375 _("partially read tar header"));
378 /* If we didn't get TARBLKSZ bytes read, punt. */
382 copysize
= min(long_read
, TARBLKSZ
);
383 memcpy(bp
, buf
, copysize
);
391 tar_entry_copy(struct tar_entry
*dst
, struct tar_entry
*src
)
393 memcpy(dst
, src
, sizeof(struct tar_entry
));
395 dst
->name
= m_strdup(src
->name
);
396 dst
->linkname
= m_strdup(src
->linkname
);
399 dst
->stat
.uname
= m_strdup(src
->stat
.uname
);
401 dst
->stat
.gname
= m_strdup(src
->stat
.gname
);
405 tar_entry_destroy(struct tar_entry
*te
)
409 free(te
->stat
.uname
);
410 free(te
->stat
.gname
);
412 memset(te
, 0, sizeof(*te
));
415 struct tar_symlink_entry
{
416 struct tar_symlink_entry
*next
;
421 * Update the tar entry from system information.
423 * Normalize UID and GID relative to the current system.
426 tar_entry_update_from_system(struct tar_entry
*te
)
428 struct passwd
*passwd
;
431 if (te
->stat
.uname
) {
432 passwd
= getpwnam(te
->stat
.uname
);
434 te
->stat
.uid
= passwd
->pw_uid
;
436 if (te
->stat
.gname
) {
437 group
= getgrnam(te
->stat
.gname
);
439 te
->stat
.gid
= group
->gr_gid
;
444 tar_extractor(struct tar_archive
*tar
)
447 char buffer
[TARBLKSZ
];
450 char *next_long_name
, *next_long_link
;
451 struct tar_symlink_entry
*symlink_head
, *symlink_tail
, *symlink_node
;
453 next_long_name
= NULL
;
454 next_long_link
= NULL
;
455 symlink_tail
= symlink_head
= NULL
;
462 while ((status
= tar
->ops
->read(tar
, buffer
, TARBLKSZ
)) == TARBLKSZ
) {
465 if (tar_header_decode((struct tar_header
*)buffer
, &h
, &tar
->err
) < 0) {
466 if (h
.name
[0] == '\0') {
467 /* The checksum failed on the terminating
468 * End Of Tape block entry of zeros. */
469 dpkg_error_destroy(&tar
->err
);
476 tar_entry_destroy(&h
);
479 if (h
.type
!= TAR_FILETYPE_GNU_LONGLINK
&&
480 h
.type
!= TAR_FILETYPE_GNU_LONGNAME
) {
481 if (next_long_name
) {
483 h
.name
= next_long_name
;
486 if (next_long_link
) {
488 h
.linkname
= next_long_link
;
491 next_long_link
= NULL
;
492 next_long_name
= NULL
;
495 if (h
.name
[0] == '\0') {
496 status
= dpkg_put_error(&tar
->err
,
497 _("invalid tar header with empty name field"));
499 tar_entry_destroy(&h
);
503 name_len
= strlen(h
.name
);
506 case TAR_FILETYPE_FILE
:
507 /* Compatibility with pre-ANSI ustar. */
508 if (h
.name
[name_len
- 1] != '/') {
509 status
= tar
->ops
->extract_file(tar
, &h
);
512 /* Else, fall through. */
513 case TAR_FILETYPE_DIR
:
514 if (h
.name
[name_len
- 1] == '/') {
515 h
.name
[name_len
- 1] = '\0';
517 status
= tar
->ops
->mkdir(tar
, &h
);
519 case TAR_FILETYPE_HARDLINK
:
520 status
= tar
->ops
->link(tar
, &h
);
522 case TAR_FILETYPE_SYMLINK
:
523 symlink_node
= m_malloc(sizeof(*symlink_node
));
524 symlink_node
->next
= NULL
;
525 tar_entry_copy(&symlink_node
->h
, &h
);
528 symlink_tail
->next
= symlink_node
;
530 symlink_head
= symlink_node
;
531 symlink_tail
= symlink_node
;
534 case TAR_FILETYPE_CHARDEV
:
535 case TAR_FILETYPE_BLOCKDEV
:
536 case TAR_FILETYPE_FIFO
:
537 status
= tar
->ops
->mknod(tar
, &h
);
539 case TAR_FILETYPE_GNU_LONGLINK
:
540 status
= tar_gnu_long(tar
, &h
, &next_long_link
);
542 case TAR_FILETYPE_GNU_LONGNAME
:
543 status
= tar_gnu_long(tar
, &h
, &next_long_name
);
545 case TAR_FILETYPE_GNU_VOLUME
:
546 case TAR_FILETYPE_GNU_MULTIVOL
:
547 case TAR_FILETYPE_GNU_SPARSE
:
548 case TAR_FILETYPE_GNU_DUMPDIR
:
549 status
= dpkg_put_error(&tar
->err
,
550 _("unsupported GNU tar header type '%c'"),
554 case TAR_FILETYPE_SOLARIS_EXTENDED
:
555 case TAR_FILETYPE_SOLARIS_ACL
:
556 status
= dpkg_put_error(&tar
->err
,
557 _("unsupported Solaris tar header type '%c'"),
561 case TAR_FILETYPE_PAX_GLOBAL
:
562 case TAR_FILETYPE_PAX_EXTENDED
:
563 status
= dpkg_put_error(&tar
->err
,
564 _("unsupported PAX tar header type '%c'"),
569 status
= dpkg_put_error(&tar
->err
,
570 _("unknown tar header type '%c'"),
574 tar_entry_destroy(&h
);
576 /* Pass on status from coroutine. */
580 while (symlink_head
) {
581 symlink_node
= symlink_head
->next
;
583 status
= tar
->ops
->symlink(tar
, &symlink_head
->h
);
584 tar_entry_destroy(&symlink_head
->h
);
586 symlink_head
= symlink_node
;
588 /* Make sure we free the long names, in case of a bogus or truncated
589 * tar archive with long entries not followed by a normal entry. */
590 free(next_long_name
);
591 free(next_long_link
);
594 status
= dpkg_put_error(&tar
->err
,
595 _("partially read tar header"));
599 /* Return whatever I/O function returned. */