Upgrade to libarchive 2.0.25 which gives us a nice speed boost along with
[dragonfly/port-amd64.git] / contrib / libarchive-1.3.1 / libarchive / archive_read.c
blob78475ad8cde6a609eeb3ab53b0c00bf4cbc649a7
1 /*-
2 * Copyright (c) 2003-2004 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This file contains the "essential" portions of the read API, that
29 * is, stuff that will probably always be used by any client that
30 * actually needs to read an archive. Optional pieces have been, as
31 * far as possible, separated out into separate files to avoid
32 * needlessly bloating statically-linked clients.
35 #include "archive_platform.h"
36 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.22 2006/09/05 05:59:45 kientzle Exp $");
38 #include <errno.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
44 #include "archive.h"
45 #include "archive_entry.h"
46 #include "archive_private.h"
48 static int choose_decompressor(struct archive *, const void*, size_t);
49 static int choose_format(struct archive *);
52 * Allocate, initialize and return a struct archive object.
54 struct archive *
55 archive_read_new(void)
57 struct archive *a;
58 unsigned char *nulls;
60 a = malloc(sizeof(*a));
61 if (a == NULL) {
62 archive_set_error(a, ENOMEM, "Can't allocate archive object");
63 return (NULL);
65 memset(a, 0, sizeof(*a));
67 a->user_uid = geteuid();
68 a->magic = ARCHIVE_READ_MAGIC;
69 a->bytes_per_block = ARCHIVE_DEFAULT_BYTES_PER_BLOCK;
71 a->null_length = 1024;
72 nulls = malloc(a->null_length);
73 if (nulls == NULL) {
74 archive_set_error(a, ENOMEM, "Can't allocate archive object 'nulls' element");
75 free(a);
76 return (NULL);
78 memset(nulls, 0, a->null_length);
79 a->nulls = nulls;
81 a->state = ARCHIVE_STATE_NEW;
82 a->entry = archive_entry_new();
84 /* We always support uncompressed archives. */
85 archive_read_support_compression_none((struct archive*)a);
87 return (a);
91 * Record the do-not-extract-to file. This belongs in archive_read_extract.c.
93 void
94 archive_read_extract_set_skip_file(struct archive *a, dev_t d, ino_t i)
96 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_extract_set_skip_file");
97 a->skip_file_dev = d;
98 a->skip_file_ino = i;
103 * Open the archive
106 archive_read_open(struct archive *a, void *client_data,
107 archive_open_callback *client_opener, archive_read_callback *client_reader,
108 archive_close_callback *client_closer)
110 /* Old archive_read_open() is just a thin shell around
111 * archive_read_open2. */
112 return archive_read_open2(a, client_data, client_opener,
113 client_reader, NULL, client_closer);
117 archive_read_open2(struct archive *a, void *client_data,
118 archive_open_callback *client_opener,
119 archive_read_callback *client_reader,
120 archive_skip_callback *client_skipper,
121 archive_close_callback *client_closer)
123 const void *buffer;
124 ssize_t bytes_read;
125 int high_bidder;
126 int e;
128 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "archive_read_open");
130 if (client_reader == NULL)
131 __archive_errx(1,
132 "No reader function provided to archive_read_open");
135 * Set these NULL initially. If the open or initial read fails,
136 * we'll leave them NULL to indicate that the file is invalid.
137 * (In particular, this helps ensure that the closer doesn't
138 * get called more than once.)
140 a->client_opener = NULL;
141 a->client_reader = NULL;
142 a->client_skipper = NULL;
143 a->client_closer = NULL;
144 a->client_data = NULL;
146 /* Open data source. */
147 if (client_opener != NULL) {
148 e =(client_opener)(a, client_data);
149 if (e != 0) {
150 /* If the open failed, call the closer to clean up. */
151 if (client_closer)
152 (client_closer)(a, client_data);
153 return (e);
157 /* Read first block now for format detection. */
158 bytes_read = (client_reader)(a, client_data, &buffer);
160 if (bytes_read < 0) {
161 /* If the first read fails, close before returning error. */
162 if (client_closer)
163 (client_closer)(a, client_data);
164 /* client_reader should have already set error information. */
165 return (ARCHIVE_FATAL);
168 /* An empty archive is a serious error. */
169 if (bytes_read == 0) {
170 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
171 "Empty input file");
172 /* Close the empty file. */
173 if (client_closer)
174 (client_closer)(a, client_data);
175 return (ARCHIVE_FATAL);
178 /* Now that the client callbacks have worked, remember them. */
179 a->client_opener = client_opener; /* Do we need to remember this? */
180 a->client_reader = client_reader;
181 a->client_skipper = client_skipper;
182 a->client_closer = client_closer;
183 a->client_data = client_data;
185 /* Select a decompression routine. */
186 high_bidder = choose_decompressor(a, buffer, bytes_read);
187 if (high_bidder < 0)
188 return (ARCHIVE_FATAL);
190 /* Initialize decompression routine with the first block of data. */
191 e = (a->decompressors[high_bidder].init)(a, buffer, bytes_read);
193 if (e == ARCHIVE_OK)
194 a->state = ARCHIVE_STATE_HEADER;
196 return (e);
200 * Allow each registered decompression routine to bid on whether it
201 * wants to handle this stream. Return index of winning bidder.
203 static int
204 choose_decompressor(struct archive *a, const void *buffer, size_t bytes_read)
206 int decompression_slots, i, bid, best_bid, best_bid_slot;
208 decompression_slots = sizeof(a->decompressors) /
209 sizeof(a->decompressors[0]);
211 best_bid = -1;
212 best_bid_slot = -1;
214 for (i = 0; i < decompression_slots; i++) {
215 if (a->decompressors[i].bid) {
216 bid = (a->decompressors[i].bid)(buffer, bytes_read);
217 if ((bid > best_bid) || (best_bid_slot < 0)) {
218 best_bid = bid;
219 best_bid_slot = i;
225 * There were no bidders; this is a serious programmer error
226 * and demands a quick and definitive abort.
228 if (best_bid_slot < 0)
229 __archive_errx(1, "No decompressors were registered; you "
230 "must call at least one "
231 "archive_read_support_compression_XXX function in order "
232 "to successfully read an archive.");
235 * There were bidders, but no non-zero bids; this means we can't
236 * support this stream.
238 if (best_bid < 1) {
239 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
240 "Unrecognized archive format");
241 return (ARCHIVE_FATAL);
244 return (best_bid_slot);
248 * Read header of next entry.
251 archive_read_next_header(struct archive *a, struct archive_entry **entryp)
253 struct archive_entry *entry;
254 int slot, ret;
256 __archive_check_magic(a, ARCHIVE_READ_MAGIC,
257 ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA, "archive_read_next_header");
259 *entryp = NULL;
260 entry = a->entry;
261 archive_entry_clear(entry);
262 archive_string_empty(&a->error_string);
265 * If client didn't consume entire data, skip any remainder
266 * (This is especially important for GNU incremental directories.)
268 if (a->state == ARCHIVE_STATE_DATA) {
269 ret = archive_read_data_skip(a);
270 if (ret == ARCHIVE_EOF) {
271 archive_set_error(a, EIO, "Premature end-of-file.");
272 a->state = ARCHIVE_STATE_FATAL;
273 return (ARCHIVE_FATAL);
275 if (ret != ARCHIVE_OK)
276 return (ret);
279 /* Record start-of-header. */
280 a->header_position = a->file_position;
282 slot = choose_format(a);
283 if (slot < 0) {
284 a->state = ARCHIVE_STATE_FATAL;
285 return (ARCHIVE_FATAL);
287 a->format = &(a->formats[slot]);
288 a->pformat_data = &(a->format->format_data);
289 ret = (a->format->read_header)(a, entry);
292 * EOF and FATAL are persistent at this layer. By
293 * modifying the state, we gaurantee that future calls to
294 * read a header or read data will fail.
296 switch (ret) {
297 case ARCHIVE_EOF:
298 a->state = ARCHIVE_STATE_EOF;
299 break;
300 case ARCHIVE_OK:
301 a->state = ARCHIVE_STATE_DATA;
302 break;
303 case ARCHIVE_WARN:
304 a->state = ARCHIVE_STATE_DATA;
305 break;
306 case ARCHIVE_RETRY:
307 break;
308 case ARCHIVE_FATAL:
309 a->state = ARCHIVE_STATE_FATAL;
310 break;
313 *entryp = entry;
314 a->read_data_output_offset = 0;
315 a->read_data_remaining = 0;
316 return (ret);
320 * Allow each registered format to bid on whether it wants to handle
321 * the next entry. Return index of winning bidder.
323 static int
324 choose_format(struct archive *a)
326 int slots;
327 int i;
328 int bid, best_bid;
329 int best_bid_slot;
331 slots = sizeof(a->formats) / sizeof(a->formats[0]);
332 best_bid = -1;
333 best_bid_slot = -1;
335 /* Set up a->format and a->pformat_data for convenience of bidders. */
336 a->format = &(a->formats[0]);
337 for (i = 0; i < slots; i++, a->format++) {
338 if (a->format->bid) {
339 a->pformat_data = &(a->format->format_data);
340 bid = (a->format->bid)(a);
341 if (bid == ARCHIVE_FATAL)
342 return (ARCHIVE_FATAL);
343 if ((bid > best_bid) || (best_bid_slot < 0)) {
344 best_bid = bid;
345 best_bid_slot = i;
351 * There were no bidders; this is a serious programmer error
352 * and demands a quick and definitive abort.
354 if (best_bid_slot < 0)
355 __archive_errx(1, "No formats were registered; you must "
356 "invoke at least one archive_read_support_format_XXX "
357 "function in order to successfully read an archive.");
360 * There were bidders, but no non-zero bids; this means we
361 * can't support this stream.
363 if (best_bid < 1) {
364 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
365 "Unrecognized archive format");
366 return (ARCHIVE_FATAL);
369 return (best_bid_slot);
373 * Return the file offset (within the uncompressed data stream) where
374 * the last header started.
376 int64_t
377 archive_read_header_position(struct archive *a)
379 return (a->header_position);
383 * Read data from an archive entry, using a read(2)-style interface.
384 * This is a convenience routine that just calls
385 * archive_read_data_block and copies the results into the client
386 * buffer, filling any gaps with zero bytes. Clients using this
387 * API can be completely ignorant of sparse-file issues; sparse files
388 * will simply be padded with nulls.
390 * DO NOT intermingle calls to this function and archive_read_data_block
391 * to read a single entry body.
393 ssize_t
394 archive_read_data(struct archive *a, void *buff, size_t s)
396 char *dest;
397 size_t bytes_read;
398 size_t len;
399 int r;
401 bytes_read = 0;
402 dest = buff;
404 while (s > 0) {
405 if (a->read_data_remaining <= 0) {
406 r = archive_read_data_block(a,
407 (const void **)&a->read_data_block,
408 &a->read_data_remaining,
409 &a->read_data_offset);
410 if (r == ARCHIVE_EOF)
411 return (bytes_read);
413 * Error codes are all negative, so the status
414 * return here cannot be confused with a valid
415 * byte count. (ARCHIVE_OK is zero.)
417 if (r < ARCHIVE_OK)
418 return (r);
421 if (a->read_data_offset < a->read_data_output_offset) {
422 archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
423 "Encountered out-of-order sparse blocks");
424 return (ARCHIVE_RETRY);
425 } else {
426 len = a->read_data_remaining;
427 if (len > s)
428 len = s;
429 memcpy(dest, a->read_data_block, len);
430 s -= len;
431 a->read_data_block += len;
432 a->read_data_remaining -= len;
433 a->read_data_output_offset += len;
434 a->read_data_offset += len;
435 dest += len;
436 bytes_read += len;
439 return (bytes_read);
443 * Skip over all remaining data in this entry.
446 archive_read_data_skip(struct archive *a)
448 int r;
449 const void *buff;
450 size_t size;
451 off_t offset;
453 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_skip");
455 if (a->format->read_data_skip != NULL)
456 r = (a->format->read_data_skip)(a);
457 else {
458 while ((r = archive_read_data_block(a, &buff, &size, &offset))
459 == ARCHIVE_OK)
463 if (r == ARCHIVE_EOF)
464 r = ARCHIVE_OK;
466 a->state = ARCHIVE_STATE_HEADER;
467 return (r);
471 * Read the next block of entry data from the archive.
472 * This is a zero-copy interface; the client receives a pointer,
473 * size, and file offset of the next available block of data.
475 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if
476 * the end of entry is encountered.
479 archive_read_data_block(struct archive *a,
480 const void **buff, size_t *size, off_t *offset)
482 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA, "archive_read_data_block");
484 if (a->format->read_data == NULL) {
485 archive_set_error(a, ARCHIVE_ERRNO_PROGRAMMER,
486 "Internal error: "
487 "No format_read_data_block function registered");
488 return (ARCHIVE_FATAL);
491 return (a->format->read_data)(a, buff, size, offset);
495 * Close the file and release most resources.
497 * Be careful: client might just call read_new and then read_finish.
498 * Don't assume we actually read anything or performed any non-trivial
499 * initialization.
502 archive_read_close(struct archive *a)
504 int r = ARCHIVE_OK, r1 = ARCHIVE_OK;
506 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_close");
507 a->state = ARCHIVE_STATE_CLOSED;
509 /* Call cleanup functions registered by optional components. */
510 if (a->cleanup_archive_extract != NULL)
511 r = (a->cleanup_archive_extract)(a);
513 /* TODO: Finish the format processing. */
515 /* Close the input machinery. */
516 if (a->compression_finish != NULL) {
517 r1 = (a->compression_finish)(a);
518 if (r1 < r)
519 r = r1;
522 return (r);
526 * Release memory and other resources.
528 #if ARCHIVE_API_VERSION > 1
530 #else
531 /* Temporarily allow library to compile with either 1.x or 2.0 API. */
532 void
533 #endif
534 archive_read_finish(struct archive *a)
536 int i;
537 int slots;
538 int r = ARCHIVE_OK;
540 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY, "archive_read_finish");
541 if (a->state != ARCHIVE_STATE_CLOSED)
542 r = archive_read_close(a);
544 /* Cleanup format-specific data. */
545 slots = sizeof(a->formats) / sizeof(a->formats[0]);
546 for (i = 0; i < slots; i++) {
547 a->pformat_data = &(a->formats[i].format_data);
548 if (a->formats[i].cleanup)
549 (a->formats[i].cleanup)(a);
552 /* Casting a pointer to int allows us to remove 'const.' */
553 free((void *)(uintptr_t)(const void *)a->nulls);
554 archive_string_free(&a->error_string);
555 if (a->entry)
556 archive_entry_free(a->entry);
557 a->magic = 0;
558 free(a);
559 #if ARCHIVE_API_VERSION > 1
560 return (r);
561 #endif
565 * Used internally by read format handlers to register their bid and
566 * initialization functions.
569 __archive_read_register_format(struct archive *a,
570 void *format_data,
571 int (*bid)(struct archive *),
572 int (*read_header)(struct archive *, struct archive_entry *),
573 int (*read_data)(struct archive *, const void **, size_t *, off_t *),
574 int (*read_data_skip)(struct archive *),
575 int (*cleanup)(struct archive *))
577 int i, number_slots;
579 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_register_format");
581 number_slots = sizeof(a->formats) / sizeof(a->formats[0]);
583 for (i = 0; i < number_slots; i++) {
584 if (a->formats[i].bid == bid)
585 return (ARCHIVE_WARN); /* We've already installed */
586 if (a->formats[i].bid == NULL) {
587 a->formats[i].bid = bid;
588 a->formats[i].read_header = read_header;
589 a->formats[i].read_data = read_data;
590 a->formats[i].read_data_skip = read_data_skip;
591 a->formats[i].cleanup = cleanup;
592 a->formats[i].format_data = format_data;
593 return (ARCHIVE_OK);
597 __archive_errx(1, "Not enough slots for format registration");
598 return (ARCHIVE_FATAL); /* Never actually called. */
602 * Used internally by decompression routines to register their bid and
603 * initialization functions.
606 __archive_read_register_compression(struct archive *a,
607 int (*bid)(const void *, size_t),
608 int (*init)(struct archive *, const void *, size_t))
610 int i, number_slots;
612 __archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, "__archive_read_register_compression");
614 number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]);
616 for (i = 0; i < number_slots; i++) {
617 if (a->decompressors[i].bid == bid)
618 return (ARCHIVE_OK); /* We've already installed */
619 if (a->decompressors[i].bid == NULL) {
620 a->decompressors[i].bid = bid;
621 a->decompressors[i].init = init;
622 return (ARCHIVE_OK);
626 __archive_errx(1, "Not enough slots for compression registration");
627 return (ARCHIVE_FATAL); /* Never actually executed. */