Import libarchive-2.5.4b.
[dragonfly.git] / contrib / libarchive-2 / libarchive / archive_read_support_format_ar.c
blob15d7e7cfd725afe7f18fd55878e46f927e6bb2ea
1 /*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.9 2008/03/12 21:10:26 kaiw Exp $");
31 #ifdef HAVE_SYS_STAT_H
32 #include <sys/stat.h>
33 #endif
34 #ifdef HAVE_ERRNO_H
35 #include <errno.h>
36 #endif
37 #ifdef HAVE_STDLIB_H
38 #include <stdlib.h>
39 #endif
40 #ifdef HAVE_STRING_H
41 #include <string.h>
42 #endif
44 #include "archive.h"
45 #include "archive_entry.h"
46 #include "archive_private.h"
47 #include "archive_read_private.h"
49 struct ar {
50 off_t entry_bytes_remaining;
51 off_t entry_offset;
52 off_t entry_padding;
53 char *strtab;
54 size_t strtab_size;
58 * Define structure of the "ar" header.
60 #define AR_name_offset 0
61 #define AR_name_size 16
62 #define AR_date_offset 16
63 #define AR_date_size 12
64 #define AR_uid_offset 28
65 #define AR_uid_size 6
66 #define AR_gid_offset 34
67 #define AR_gid_size 6
68 #define AR_mode_offset 40
69 #define AR_mode_size 8
70 #define AR_size_offset 48
71 #define AR_size_size 10
72 #define AR_fmag_offset 58
73 #define AR_fmag_size 2
75 #define isdigit(x) (x) >= '0' && (x) <= '9'
77 static int archive_read_format_ar_bid(struct archive_read *a);
78 static int archive_read_format_ar_cleanup(struct archive_read *a);
79 static int archive_read_format_ar_read_data(struct archive_read *a,
80 const void **buff, size_t *size, off_t *offset);
81 static int archive_read_format_ar_skip(struct archive_read *a);
82 static int archive_read_format_ar_read_header(struct archive_read *a,
83 struct archive_entry *e);
84 static uint64_t ar_atol8(const char *p, unsigned char_cnt);
85 static uint64_t ar_atol10(const char *p, unsigned char_cnt);
86 static int ar_parse_gnu_filename_table(struct archive_read *a);
87 static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
88 const char *h);
90 int
91 archive_read_support_format_ar(struct archive *_a)
93 struct archive_read *a = (struct archive_read *)_a;
94 struct ar *ar;
95 int r;
97 ar = (struct ar *)malloc(sizeof(*ar));
98 if (ar == NULL) {
99 archive_set_error(&a->archive, ENOMEM,
100 "Can't allocate ar data");
101 return (ARCHIVE_FATAL);
103 memset(ar, 0, sizeof(*ar));
104 ar->strtab = NULL;
106 r = __archive_read_register_format(a,
108 archive_read_format_ar_bid,
109 archive_read_format_ar_read_header,
110 archive_read_format_ar_read_data,
111 archive_read_format_ar_skip,
112 archive_read_format_ar_cleanup);
114 if (r != ARCHIVE_OK) {
115 free(ar);
116 return (r);
118 return (ARCHIVE_OK);
121 static int
122 archive_read_format_ar_cleanup(struct archive_read *a)
124 struct ar *ar;
126 ar = (struct ar *)(a->format->data);
127 if (ar->strtab)
128 free(ar->strtab);
129 free(ar);
130 (a->format->data) = NULL;
131 return (ARCHIVE_OK);
134 static int
135 archive_read_format_ar_bid(struct archive_read *a)
137 struct ar *ar;
138 ssize_t bytes_read;
139 const void *h;
141 if (a->archive.archive_format != 0 &&
142 (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
143 ARCHIVE_FORMAT_AR)
144 return(0);
146 ar = (struct ar *)(a->format->data);
149 * Verify the 8-byte file signature.
150 * TODO: Do we need to check more than this?
152 bytes_read = (a->decompressor->read_ahead)(a, &h, 8);
153 if (bytes_read < 8)
154 return (-1);
155 if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
156 return (64);
158 return (-1);
161 static int
162 archive_read_format_ar_read_header(struct archive_read *a,
163 struct archive_entry *entry)
165 char filename[AR_name_size + 1];
166 struct ar *ar;
167 uint64_t number; /* Used to hold parsed numbers before validation. */
168 ssize_t bytes_read;
169 size_t bsd_name_length, entry_size, s;
170 char *p, *st;
171 const void *b;
172 const char *h;
173 int r;
175 ar = (struct ar*)(a->format->data);
177 if (a->archive.file_position == 0) {
179 * We are now at the beginning of the archive,
180 * so we need first consume the ar global header.
182 (a->decompressor->consume)(a, 8);
183 /* Set a default format code for now. */
184 a->archive.archive_format = ARCHIVE_FORMAT_AR;
187 /* Read the header for the next file entry. */
188 bytes_read = (a->decompressor->read_ahead)(a, &b, 60);
189 if (bytes_read < 60) {
190 /* Broken header. */
191 return (ARCHIVE_EOF);
193 (a->decompressor->consume)(a, 60);
194 h = (const char *)b;
196 /* Verify the magic signature on the file header. */
197 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
198 archive_set_error(&a->archive, EINVAL,
199 "Consistency check failed");
200 return (ARCHIVE_WARN);
203 /* Copy filename into work buffer. */
204 strncpy(filename, h + AR_name_offset, AR_name_size);
205 filename[AR_name_size] = '\0';
208 * Guess the format variant based on the filename.
210 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
211 /* We don't already know the variant, so let's guess. */
213 * Biggest clue is presence of '/': GNU starts special
214 * filenames with '/', appends '/' as terminator to
215 * non-special names, so anything with '/' should be
216 * GNU except for BSD long filenames.
218 if (strncmp(filename, "#1/", 3) == 0)
219 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
220 else if (strchr(filename, '/') != NULL)
221 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
222 else if (strncmp(filename, "__.SYMDEF", 9) == 0)
223 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
225 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
226 * if name exactly fills 16-byte field? If so, we
227 * can't assume entries without '/' are BSD. XXX
231 /* Update format name from the code. */
232 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
233 a->archive.archive_format_name = "ar (GNU/SVR4)";
234 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
235 a->archive.archive_format_name = "ar (BSD)";
236 else
237 a->archive.archive_format_name = "ar";
240 * Remove trailing spaces from the filename. GNU and BSD
241 * variants both pad filename area out with spaces.
242 * This will only be wrong if GNU/SVR4 'ar' implementations
243 * omit trailing '/' for 16-char filenames and we have
244 * a 16-char filename that ends in ' '.
246 p = filename + AR_name_size - 1;
247 while (p >= filename && *p == ' ') {
248 *p = '\0';
249 p--;
253 * Remove trailing slash unless first character is '/'.
254 * (BSD entries never end in '/', so this will only trim
255 * GNU-format entries. GNU special entries start with '/'
256 * and are not terminated in '/', so we don't trim anything
257 * that starts with '/'.)
259 if (filename[0] != '/' && *p == '/')
260 *p = '\0';
263 * '//' is the GNU filename table.
264 * Later entries can refer to names in this table.
266 if (strcmp(filename, "//") == 0) {
267 /* This must come before any call to _read_ahead. */
268 ar_parse_common_header(ar, entry, h);
269 archive_entry_copy_pathname(entry, filename);
270 archive_entry_set_filetype(entry, AE_IFREG);
271 /* Get the size of the filename table. */
272 number = ar_atol10(h + AR_size_offset, AR_size_size);
273 if (number > SIZE_MAX) {
274 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
275 "Filename table too large");
276 return (ARCHIVE_FATAL);
278 entry_size = (size_t)number;
279 if (entry_size == 0) {
280 archive_set_error(&a->archive, EINVAL,
281 "Invalid string table");
282 return (ARCHIVE_WARN);
284 if (ar->strtab != NULL) {
285 archive_set_error(&a->archive, EINVAL,
286 "More than one string tables exist");
287 return (ARCHIVE_WARN);
290 /* Read the filename table into memory. */
291 st = malloc(entry_size);
292 if (st == NULL) {
293 archive_set_error(&a->archive, ENOMEM,
294 "Can't allocate filename table buffer");
295 return (ARCHIVE_FATAL);
297 ar->strtab = st;
298 ar->strtab_size = entry_size;
299 for (s = entry_size; s > 0; s -= bytes_read) {
300 bytes_read = (a->decompressor->read_ahead)(a, &b, s);
301 if (bytes_read <= 0)
302 return (ARCHIVE_FATAL);
303 if (bytes_read > (ssize_t)s)
304 bytes_read = s;
305 memcpy(st, b, bytes_read);
306 st += bytes_read;
307 (a->decompressor->consume)(a, bytes_read);
309 /* All contents are consumed. */
310 ar->entry_bytes_remaining = 0;
311 archive_entry_set_size(entry, ar->entry_bytes_remaining);
313 /* Parse the filename table. */
314 return (ar_parse_gnu_filename_table(a));
318 * GNU variant handles long filenames by storing /<number>
319 * to indicate a name stored in the filename table.
321 if (filename[0] == '/' && isdigit(filename[1])) {
322 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
324 * If we can't look up the real name, warn and return
325 * the entry with the wrong name.
327 if (ar->strtab == NULL || number > ar->strtab_size) {
328 archive_set_error(&a->archive, EINVAL,
329 "Can't find long filename for entry");
330 archive_entry_copy_pathname(entry, filename);
331 /* Parse the time, owner, mode, size fields. */
332 ar_parse_common_header(ar, entry, h);
333 return (ARCHIVE_WARN);
336 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
337 /* Parse the time, owner, mode, size fields. */
338 return (ar_parse_common_header(ar, entry, h));
342 * BSD handles long filenames by storing "#1/" followed by the
343 * length of filename as a decimal number, then prepends the
344 * the filename to the file contents.
346 if (strncmp(filename, "#1/", 3) == 0) {
347 /* Parse the time, owner, mode, size fields. */
348 /* This must occur before _read_ahead is called again. */
349 ar_parse_common_header(ar, entry, h);
351 /* Parse the size of the name, adjust the file size. */
352 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
353 bsd_name_length = (size_t)number;
354 /* Guard against the filename + trailing NUL
355 * overflowing a size_t and against the filename size
356 * being larger than the entire entry. */
357 if (number > (uint64_t)(bsd_name_length + 1)
358 || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
359 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
360 "Bad input file size");
361 return (ARCHIVE_FATAL);
363 ar->entry_bytes_remaining -= bsd_name_length;
364 /* Adjust file size reported to client. */
365 archive_entry_set_size(entry, ar->entry_bytes_remaining);
367 /* Read the long name into memory. */
368 bytes_read = (a->decompressor->read_ahead)(a, &b, bsd_name_length);
369 if (bytes_read <= 0)
370 return (ARCHIVE_FATAL);
371 if ((size_t)bytes_read < bsd_name_length) {
372 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
373 "Truncated input file");
374 return (ARCHIVE_FATAL);
376 (a->decompressor->consume)(a, bsd_name_length);
378 /* Store it in the entry. */
379 p = (char *)malloc(bsd_name_length + 1);
380 if (p == NULL) {
381 archive_set_error(&a->archive, ENOMEM,
382 "Can't allocate fname buffer");
383 return (ARCHIVE_FATAL);
385 strncpy(p, b, bsd_name_length);
386 p[bsd_name_length] = '\0';
387 archive_entry_copy_pathname(entry, p);
388 free(p);
389 return (ARCHIVE_OK);
393 * "/" is the SVR4/GNU archive symbol table.
395 if (strcmp(filename, "/") == 0) {
396 archive_entry_copy_pathname(entry, "/");
397 /* Parse the time, owner, mode, size fields. */
398 r = ar_parse_common_header(ar, entry, h);
399 /* Force the file type to a regular file. */
400 archive_entry_set_filetype(entry, AE_IFREG);
401 return (r);
405 * "__.SYMDEF" is a BSD archive symbol table.
407 if (strcmp(filename, "__.SYMDEF") == 0) {
408 archive_entry_copy_pathname(entry, filename);
409 /* Parse the time, owner, mode, size fields. */
410 return (ar_parse_common_header(ar, entry, h));
414 * Otherwise, this is a standard entry. The filename
415 * has already been trimmed as much as possible, based
416 * on our current knowledge of the format.
418 archive_entry_copy_pathname(entry, filename);
419 return (ar_parse_common_header(ar, entry, h));
422 static int
423 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
424 const char *h)
426 uint64_t n;
428 /* Copy remaining header */
429 archive_entry_set_mtime(entry,
430 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
431 archive_entry_set_uid(entry,
432 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
433 archive_entry_set_gid(entry,
434 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
435 archive_entry_set_mode(entry,
436 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
437 n = ar_atol10(h + AR_size_offset, AR_size_size);
439 ar->entry_offset = 0;
440 ar->entry_padding = n % 2;
441 archive_entry_set_size(entry, n);
442 ar->entry_bytes_remaining = n;
443 return (ARCHIVE_OK);
446 static int
447 archive_read_format_ar_read_data(struct archive_read *a,
448 const void **buff, size_t *size, off_t *offset)
450 ssize_t bytes_read;
451 struct ar *ar;
453 ar = (struct ar *)(a->format->data);
455 if (ar->entry_bytes_remaining > 0) {
456 bytes_read = (a->decompressor->read_ahead)(a, buff, 1);
457 if (bytes_read == 0) {
458 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
459 "Truncated ar archive");
460 return (ARCHIVE_FATAL);
462 if (bytes_read < 0)
463 return (ARCHIVE_FATAL);
464 if (bytes_read > ar->entry_bytes_remaining)
465 bytes_read = (ssize_t)ar->entry_bytes_remaining;
466 *size = bytes_read;
467 *offset = ar->entry_offset;
468 ar->entry_offset += bytes_read;
469 ar->entry_bytes_remaining -= bytes_read;
470 (a->decompressor->consume)(a, (size_t)bytes_read);
471 return (ARCHIVE_OK);
472 } else {
473 while (ar->entry_padding > 0) {
474 bytes_read = (a->decompressor->read_ahead)(a, buff, 1);
475 if (bytes_read <= 0)
476 return (ARCHIVE_FATAL);
477 if (bytes_read > ar->entry_padding)
478 bytes_read = (ssize_t)ar->entry_padding;
479 (a->decompressor->consume)(a, (size_t)bytes_read);
480 ar->entry_padding -= bytes_read;
482 *buff = NULL;
483 *size = 0;
484 *offset = ar->entry_offset;
485 return (ARCHIVE_EOF);
489 static int
490 archive_read_format_ar_skip(struct archive_read *a)
492 off_t bytes_skipped;
493 struct ar* ar;
494 int r = ARCHIVE_OK;
495 const void *b; /* Dummy variables */
496 size_t s;
497 off_t o;
499 ar = (struct ar *)(a->format->data);
500 if (a->decompressor->skip == NULL) {
501 while (r == ARCHIVE_OK)
502 r = archive_read_format_ar_read_data(a, &b, &s, &o);
503 return (r);
506 bytes_skipped = (a->decompressor->skip)(a, ar->entry_bytes_remaining +
507 ar->entry_padding);
508 if (bytes_skipped < 0)
509 return (ARCHIVE_FATAL);
511 ar->entry_bytes_remaining = 0;
512 ar->entry_padding = 0;
514 return (ARCHIVE_OK);
517 static int
518 ar_parse_gnu_filename_table(struct archive_read *a)
520 struct ar *ar;
521 char *p;
522 size_t size;
524 ar = (struct ar*)(a->format->data);
525 size = ar->strtab_size;
527 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
528 if (*p == '/') {
529 *p++ = '\0';
530 if (*p != '\n')
531 goto bad_string_table;
532 *p = '\0';
536 * Sanity check, last two chars must be `/\n' or '\n\n',
537 * depending on whether the string table is padded by a '\n'
538 * (string table produced by GNU ar always has a even size).
540 if (p != ar->strtab + size && *p != '\n')
541 goto bad_string_table;
543 /* Enforce zero termination. */
544 ar->strtab[size - 1] = '\0';
546 return (ARCHIVE_OK);
548 bad_string_table:
549 archive_set_error(&a->archive, EINVAL,
550 "Invalid string table");
551 free(ar->strtab);
552 ar->strtab = NULL;
553 return (ARCHIVE_WARN);
556 static uint64_t
557 ar_atol8(const char *p, unsigned char_cnt)
559 uint64_t l, limit, last_digit_limit;
560 unsigned int digit, base;
562 base = 8;
563 limit = UINT64_MAX / base;
564 last_digit_limit = UINT64_MAX % base;
566 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
567 p++;
569 l = 0;
570 digit = *p - '0';
571 while (*p >= '0' && digit < base && char_cnt-- > 0) {
572 if (l>limit || (l == limit && digit > last_digit_limit)) {
573 l = UINT64_MAX; /* Truncate on overflow. */
574 break;
576 l = (l * base) + digit;
577 digit = *++p - '0';
579 return (l);
582 static uint64_t
583 ar_atol10(const char *p, unsigned char_cnt)
585 uint64_t l, limit, last_digit_limit;
586 unsigned int base, digit;
588 base = 10;
589 limit = UINT64_MAX / base;
590 last_digit_limit = UINT64_MAX % base;
592 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
593 p++;
594 l = 0;
595 digit = *p - '0';
596 while (*p >= '0' && digit < base && char_cnt-- > 0) {
597 if (l > limit || (l == limit && digit > last_digit_limit)) {
598 l = UINT64_MAX; /* Truncate on overflow. */
599 break;
601 l = (l * base) + digit;
602 digit = *++p - '0';
604 return (l);