2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.9 2008/03/12 21:10:26 kaiw Exp $");
31 #ifdef HAVE_SYS_STAT_H
45 #include "archive_entry.h"
46 #include "archive_private.h"
47 #include "archive_read_private.h"
50 off_t entry_bytes_remaining
;
58 * Define structure of the "ar" header.
60 #define AR_name_offset 0
61 #define AR_name_size 16
62 #define AR_date_offset 16
63 #define AR_date_size 12
64 #define AR_uid_offset 28
66 #define AR_gid_offset 34
68 #define AR_mode_offset 40
69 #define AR_mode_size 8
70 #define AR_size_offset 48
71 #define AR_size_size 10
72 #define AR_fmag_offset 58
73 #define AR_fmag_size 2
75 #define isdigit(x) (x) >= '0' && (x) <= '9'
77 static int archive_read_format_ar_bid(struct archive_read
*a
);
78 static int archive_read_format_ar_cleanup(struct archive_read
*a
);
79 static int archive_read_format_ar_read_data(struct archive_read
*a
,
80 const void **buff
, size_t *size
, off_t
*offset
);
81 static int archive_read_format_ar_skip(struct archive_read
*a
);
82 static int archive_read_format_ar_read_header(struct archive_read
*a
,
83 struct archive_entry
*e
);
84 static uint64_t ar_atol8(const char *p
, unsigned char_cnt
);
85 static uint64_t ar_atol10(const char *p
, unsigned char_cnt
);
86 static int ar_parse_gnu_filename_table(struct archive_read
*a
);
87 static int ar_parse_common_header(struct ar
*ar
, struct archive_entry
*,
91 archive_read_support_format_ar(struct archive
*_a
)
93 struct archive_read
*a
= (struct archive_read
*)_a
;
97 ar
= (struct ar
*)malloc(sizeof(*ar
));
99 archive_set_error(&a
->archive
, ENOMEM
,
100 "Can't allocate ar data");
101 return (ARCHIVE_FATAL
);
103 memset(ar
, 0, sizeof(*ar
));
106 r
= __archive_read_register_format(a
,
108 archive_read_format_ar_bid
,
109 archive_read_format_ar_read_header
,
110 archive_read_format_ar_read_data
,
111 archive_read_format_ar_skip
,
112 archive_read_format_ar_cleanup
);
114 if (r
!= ARCHIVE_OK
) {
122 archive_read_format_ar_cleanup(struct archive_read
*a
)
126 ar
= (struct ar
*)(a
->format
->data
);
130 (a
->format
->data
) = NULL
;
135 archive_read_format_ar_bid(struct archive_read
*a
)
141 if (a
->archive
.archive_format
!= 0 &&
142 (a
->archive
.archive_format
& ARCHIVE_FORMAT_BASE_MASK
) !=
146 ar
= (struct ar
*)(a
->format
->data
);
149 * Verify the 8-byte file signature.
150 * TODO: Do we need to check more than this?
152 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 8);
155 if (strncmp((const char*)h
, "!<arch>\n", 8) == 0) {
162 archive_read_format_ar_read_header(struct archive_read
*a
,
163 struct archive_entry
*entry
)
165 char filename
[AR_name_size
+ 1];
167 uint64_t number
; /* Used to hold parsed numbers before validation. */
169 size_t bsd_name_length
, entry_size
, s
;
175 ar
= (struct ar
*)(a
->format
->data
);
177 if (a
->archive
.file_position
== 0) {
179 * We are now at the beginning of the archive,
180 * so we need first consume the ar global header.
182 (a
->decompressor
->consume
)(a
, 8);
183 /* Set a default format code for now. */
184 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR
;
187 /* Read the header for the next file entry. */
188 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, 60);
189 if (bytes_read
< 60) {
191 return (ARCHIVE_EOF
);
193 (a
->decompressor
->consume
)(a
, 60);
196 /* Verify the magic signature on the file header. */
197 if (strncmp(h
+ AR_fmag_offset
, "`\n", 2) != 0) {
198 archive_set_error(&a
->archive
, EINVAL
,
199 "Consistency check failed");
200 return (ARCHIVE_WARN
);
203 /* Copy filename into work buffer. */
204 strncpy(filename
, h
+ AR_name_offset
, AR_name_size
);
205 filename
[AR_name_size
] = '\0';
208 * Guess the format variant based on the filename.
210 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR
) {
211 /* We don't already know the variant, so let's guess. */
213 * Biggest clue is presence of '/': GNU starts special
214 * filenames with '/', appends '/' as terminator to
215 * non-special names, so anything with '/' should be
216 * GNU except for BSD long filenames.
218 if (strncmp(filename
, "#1/", 3) == 0)
219 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_BSD
;
220 else if (strchr(filename
, '/') != NULL
)
221 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_GNU
;
222 else if (strncmp(filename
, "__.SYMDEF", 9) == 0)
223 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_BSD
;
225 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
226 * if name exactly fills 16-byte field? If so, we
227 * can't assume entries without '/' are BSD. XXX
231 /* Update format name from the code. */
232 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR_GNU
)
233 a
->archive
.archive_format_name
= "ar (GNU/SVR4)";
234 else if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR_BSD
)
235 a
->archive
.archive_format_name
= "ar (BSD)";
237 a
->archive
.archive_format_name
= "ar";
240 * Remove trailing spaces from the filename. GNU and BSD
241 * variants both pad filename area out with spaces.
242 * This will only be wrong if GNU/SVR4 'ar' implementations
243 * omit trailing '/' for 16-char filenames and we have
244 * a 16-char filename that ends in ' '.
246 p
= filename
+ AR_name_size
- 1;
247 while (p
>= filename
&& *p
== ' ') {
253 * Remove trailing slash unless first character is '/'.
254 * (BSD entries never end in '/', so this will only trim
255 * GNU-format entries. GNU special entries start with '/'
256 * and are not terminated in '/', so we don't trim anything
257 * that starts with '/'.)
259 if (filename
[0] != '/' && *p
== '/')
263 * '//' is the GNU filename table.
264 * Later entries can refer to names in this table.
266 if (strcmp(filename
, "//") == 0) {
267 /* This must come before any call to _read_ahead. */
268 ar_parse_common_header(ar
, entry
, h
);
269 archive_entry_copy_pathname(entry
, filename
);
270 archive_entry_set_filetype(entry
, AE_IFREG
);
271 /* Get the size of the filename table. */
272 number
= ar_atol10(h
+ AR_size_offset
, AR_size_size
);
273 if (number
> SIZE_MAX
) {
274 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
275 "Filename table too large");
276 return (ARCHIVE_FATAL
);
278 entry_size
= (size_t)number
;
279 if (entry_size
== 0) {
280 archive_set_error(&a
->archive
, EINVAL
,
281 "Invalid string table");
282 return (ARCHIVE_WARN
);
284 if (ar
->strtab
!= NULL
) {
285 archive_set_error(&a
->archive
, EINVAL
,
286 "More than one string tables exist");
287 return (ARCHIVE_WARN
);
290 /* Read the filename table into memory. */
291 st
= malloc(entry_size
);
293 archive_set_error(&a
->archive
, ENOMEM
,
294 "Can't allocate filename table buffer");
295 return (ARCHIVE_FATAL
);
298 ar
->strtab_size
= entry_size
;
299 for (s
= entry_size
; s
> 0; s
-= bytes_read
) {
300 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, s
);
302 return (ARCHIVE_FATAL
);
303 if (bytes_read
> (ssize_t
)s
)
305 memcpy(st
, b
, bytes_read
);
307 (a
->decompressor
->consume
)(a
, bytes_read
);
309 /* All contents are consumed. */
310 ar
->entry_bytes_remaining
= 0;
311 archive_entry_set_size(entry
, ar
->entry_bytes_remaining
);
313 /* Parse the filename table. */
314 return (ar_parse_gnu_filename_table(a
));
318 * GNU variant handles long filenames by storing /<number>
319 * to indicate a name stored in the filename table.
321 if (filename
[0] == '/' && isdigit(filename
[1])) {
322 number
= ar_atol10(h
+ AR_name_offset
+ 1, AR_name_size
- 1);
324 * If we can't look up the real name, warn and return
325 * the entry with the wrong name.
327 if (ar
->strtab
== NULL
|| number
> ar
->strtab_size
) {
328 archive_set_error(&a
->archive
, EINVAL
,
329 "Can't find long filename for entry");
330 archive_entry_copy_pathname(entry
, filename
);
331 /* Parse the time, owner, mode, size fields. */
332 ar_parse_common_header(ar
, entry
, h
);
333 return (ARCHIVE_WARN
);
336 archive_entry_copy_pathname(entry
, &ar
->strtab
[(size_t)number
]);
337 /* Parse the time, owner, mode, size fields. */
338 return (ar_parse_common_header(ar
, entry
, h
));
342 * BSD handles long filenames by storing "#1/" followed by the
343 * length of filename as a decimal number, then prepends the
344 * the filename to the file contents.
346 if (strncmp(filename
, "#1/", 3) == 0) {
347 /* Parse the time, owner, mode, size fields. */
348 /* This must occur before _read_ahead is called again. */
349 ar_parse_common_header(ar
, entry
, h
);
351 /* Parse the size of the name, adjust the file size. */
352 number
= ar_atol10(h
+ AR_name_offset
+ 3, AR_name_size
- 3);
353 bsd_name_length
= (size_t)number
;
354 /* Guard against the filename + trailing NUL
355 * overflowing a size_t and against the filename size
356 * being larger than the entire entry. */
357 if (number
> (uint64_t)(bsd_name_length
+ 1)
358 || (off_t
)bsd_name_length
> ar
->entry_bytes_remaining
) {
359 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
360 "Bad input file size");
361 return (ARCHIVE_FATAL
);
363 ar
->entry_bytes_remaining
-= bsd_name_length
;
364 /* Adjust file size reported to client. */
365 archive_entry_set_size(entry
, ar
->entry_bytes_remaining
);
367 /* Read the long name into memory. */
368 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, bsd_name_length
);
370 return (ARCHIVE_FATAL
);
371 if ((size_t)bytes_read
< bsd_name_length
) {
372 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
373 "Truncated input file");
374 return (ARCHIVE_FATAL
);
376 (a
->decompressor
->consume
)(a
, bsd_name_length
);
378 /* Store it in the entry. */
379 p
= (char *)malloc(bsd_name_length
+ 1);
381 archive_set_error(&a
->archive
, ENOMEM
,
382 "Can't allocate fname buffer");
383 return (ARCHIVE_FATAL
);
385 strncpy(p
, b
, bsd_name_length
);
386 p
[bsd_name_length
] = '\0';
387 archive_entry_copy_pathname(entry
, p
);
393 * "/" is the SVR4/GNU archive symbol table.
395 if (strcmp(filename
, "/") == 0) {
396 archive_entry_copy_pathname(entry
, "/");
397 /* Parse the time, owner, mode, size fields. */
398 r
= ar_parse_common_header(ar
, entry
, h
);
399 /* Force the file type to a regular file. */
400 archive_entry_set_filetype(entry
, AE_IFREG
);
405 * "__.SYMDEF" is a BSD archive symbol table.
407 if (strcmp(filename
, "__.SYMDEF") == 0) {
408 archive_entry_copy_pathname(entry
, filename
);
409 /* Parse the time, owner, mode, size fields. */
410 return (ar_parse_common_header(ar
, entry
, h
));
414 * Otherwise, this is a standard entry. The filename
415 * has already been trimmed as much as possible, based
416 * on our current knowledge of the format.
418 archive_entry_copy_pathname(entry
, filename
);
419 return (ar_parse_common_header(ar
, entry
, h
));
423 ar_parse_common_header(struct ar
*ar
, struct archive_entry
*entry
,
428 /* Copy remaining header */
429 archive_entry_set_mtime(entry
,
430 (time_t)ar_atol10(h
+ AR_date_offset
, AR_date_size
), 0L);
431 archive_entry_set_uid(entry
,
432 (uid_t
)ar_atol10(h
+ AR_uid_offset
, AR_uid_size
));
433 archive_entry_set_gid(entry
,
434 (gid_t
)ar_atol10(h
+ AR_gid_offset
, AR_gid_size
));
435 archive_entry_set_mode(entry
,
436 (mode_t
)ar_atol8(h
+ AR_mode_offset
, AR_mode_size
));
437 n
= ar_atol10(h
+ AR_size_offset
, AR_size_size
);
439 ar
->entry_offset
= 0;
440 ar
->entry_padding
= n
% 2;
441 archive_entry_set_size(entry
, n
);
442 ar
->entry_bytes_remaining
= n
;
447 archive_read_format_ar_read_data(struct archive_read
*a
,
448 const void **buff
, size_t *size
, off_t
*offset
)
453 ar
= (struct ar
*)(a
->format
->data
);
455 if (ar
->entry_bytes_remaining
> 0) {
456 bytes_read
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
457 if (bytes_read
== 0) {
458 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
459 "Truncated ar archive");
460 return (ARCHIVE_FATAL
);
463 return (ARCHIVE_FATAL
);
464 if (bytes_read
> ar
->entry_bytes_remaining
)
465 bytes_read
= (ssize_t
)ar
->entry_bytes_remaining
;
467 *offset
= ar
->entry_offset
;
468 ar
->entry_offset
+= bytes_read
;
469 ar
->entry_bytes_remaining
-= bytes_read
;
470 (a
->decompressor
->consume
)(a
, (size_t)bytes_read
);
473 while (ar
->entry_padding
> 0) {
474 bytes_read
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
476 return (ARCHIVE_FATAL
);
477 if (bytes_read
> ar
->entry_padding
)
478 bytes_read
= (ssize_t
)ar
->entry_padding
;
479 (a
->decompressor
->consume
)(a
, (size_t)bytes_read
);
480 ar
->entry_padding
-= bytes_read
;
484 *offset
= ar
->entry_offset
;
485 return (ARCHIVE_EOF
);
490 archive_read_format_ar_skip(struct archive_read
*a
)
495 const void *b
; /* Dummy variables */
499 ar
= (struct ar
*)(a
->format
->data
);
500 if (a
->decompressor
->skip
== NULL
) {
501 while (r
== ARCHIVE_OK
)
502 r
= archive_read_format_ar_read_data(a
, &b
, &s
, &o
);
506 bytes_skipped
= (a
->decompressor
->skip
)(a
, ar
->entry_bytes_remaining
+
508 if (bytes_skipped
< 0)
509 return (ARCHIVE_FATAL
);
511 ar
->entry_bytes_remaining
= 0;
512 ar
->entry_padding
= 0;
518 ar_parse_gnu_filename_table(struct archive_read
*a
)
524 ar
= (struct ar
*)(a
->format
->data
);
525 size
= ar
->strtab_size
;
527 for (p
= ar
->strtab
; p
< ar
->strtab
+ size
- 1; ++p
) {
531 goto bad_string_table
;
536 * Sanity check, last two chars must be `/\n' or '\n\n',
537 * depending on whether the string table is padded by a '\n'
538 * (string table produced by GNU ar always has a even size).
540 if (p
!= ar
->strtab
+ size
&& *p
!= '\n')
541 goto bad_string_table
;
543 /* Enforce zero termination. */
544 ar
->strtab
[size
- 1] = '\0';
549 archive_set_error(&a
->archive
, EINVAL
,
550 "Invalid string table");
553 return (ARCHIVE_WARN
);
557 ar_atol8(const char *p
, unsigned char_cnt
)
559 uint64_t l
, limit
, last_digit_limit
;
560 unsigned int digit
, base
;
563 limit
= UINT64_MAX
/ base
;
564 last_digit_limit
= UINT64_MAX
% base
;
566 while ((*p
== ' ' || *p
== '\t') && char_cnt
-- > 0)
571 while (*p
>= '0' && digit
< base
&& char_cnt
-- > 0) {
572 if (l
>limit
|| (l
== limit
&& digit
> last_digit_limit
)) {
573 l
= UINT64_MAX
; /* Truncate on overflow. */
576 l
= (l
* base
) + digit
;
583 ar_atol10(const char *p
, unsigned char_cnt
)
585 uint64_t l
, limit
, last_digit_limit
;
586 unsigned int base
, digit
;
589 limit
= UINT64_MAX
/ base
;
590 last_digit_limit
= UINT64_MAX
% base
;
592 while ((*p
== ' ' || *p
== '\t') && char_cnt
-- > 0)
596 while (*p
>= '0' && digit
< base
&& char_cnt
-- > 0) {
597 if (l
> limit
|| (l
== limit
&& digit
> last_digit_limit
)) {
598 l
= UINT64_MAX
; /* Truncate on overflow. */
601 l
= (l
* base
) + digit
;