2 * Copyright (c) 2003-2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This file contains the "essential" portions of the read API, that
29 * is, stuff that will probably always be used by any client that
30 * actually needs to read an archive. Optional pieces have been, as
31 * far as possible, separated out into separate files to avoid
32 * needlessly bloating statically-linked clients.
35 #include "archive_platform.h"
36 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.22 2006/09/05 05:59:45 kientzle Exp $");
45 #include "archive_entry.h"
46 #include "archive_private.h"
48 static int choose_decompressor(struct archive
*, const void*, size_t);
49 static int choose_format(struct archive
*);
52 * Allocate, initialize and return a struct archive object.
55 archive_read_new(void)
60 a
= malloc(sizeof(*a
));
62 archive_set_error(a
, ENOMEM
, "Can't allocate archive object");
65 memset(a
, 0, sizeof(*a
));
67 a
->user_uid
= geteuid();
68 a
->magic
= ARCHIVE_READ_MAGIC
;
69 a
->bytes_per_block
= ARCHIVE_DEFAULT_BYTES_PER_BLOCK
;
71 a
->null_length
= 1024;
72 nulls
= malloc(a
->null_length
);
74 archive_set_error(a
, ENOMEM
, "Can't allocate archive object 'nulls' element");
78 memset(nulls
, 0, a
->null_length
);
81 a
->state
= ARCHIVE_STATE_NEW
;
82 a
->entry
= archive_entry_new();
84 /* We always support uncompressed archives. */
85 archive_read_support_compression_none((struct archive
*)a
);
91 * Record the do-not-extract-to file. This belongs in archive_read_extract.c.
94 archive_read_extract_set_skip_file(struct archive
*a
, dev_t d
, ino_t i
)
96 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_ANY
, "archive_read_extract_set_skip_file");
106 archive_read_open(struct archive
*a
, void *client_data
,
107 archive_open_callback
*client_opener
, archive_read_callback
*client_reader
,
108 archive_close_callback
*client_closer
)
110 /* Old archive_read_open() is just a thin shell around
111 * archive_read_open2. */
112 return archive_read_open2(a
, client_data
, client_opener
,
113 client_reader
, NULL
, client_closer
);
117 archive_read_open2(struct archive
*a
, void *client_data
,
118 archive_open_callback
*client_opener
,
119 archive_read_callback
*client_reader
,
120 archive_skip_callback
*client_skipper
,
121 archive_close_callback
*client_closer
)
128 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_NEW
, "archive_read_open");
130 if (client_reader
== NULL
)
132 "No reader function provided to archive_read_open");
135 * Set these NULL initially. If the open or initial read fails,
136 * we'll leave them NULL to indicate that the file is invalid.
137 * (In particular, this helps ensure that the closer doesn't
138 * get called more than once.)
140 a
->client_opener
= NULL
;
141 a
->client_reader
= NULL
;
142 a
->client_skipper
= NULL
;
143 a
->client_closer
= NULL
;
144 a
->client_data
= NULL
;
146 /* Open data source. */
147 if (client_opener
!= NULL
) {
148 e
=(client_opener
)(a
, client_data
);
150 /* If the open failed, call the closer to clean up. */
152 (client_closer
)(a
, client_data
);
157 /* Read first block now for format detection. */
158 bytes_read
= (client_reader
)(a
, client_data
, &buffer
);
160 if (bytes_read
< 0) {
161 /* If the first read fails, close before returning error. */
163 (client_closer
)(a
, client_data
);
164 /* client_reader should have already set error information. */
165 return (ARCHIVE_FATAL
);
168 /* An empty archive is a serious error. */
169 if (bytes_read
== 0) {
170 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
172 /* Close the empty file. */
174 (client_closer
)(a
, client_data
);
175 return (ARCHIVE_FATAL
);
178 /* Now that the client callbacks have worked, remember them. */
179 a
->client_opener
= client_opener
; /* Do we need to remember this? */
180 a
->client_reader
= client_reader
;
181 a
->client_skipper
= client_skipper
;
182 a
->client_closer
= client_closer
;
183 a
->client_data
= client_data
;
185 /* Select a decompression routine. */
186 high_bidder
= choose_decompressor(a
, buffer
, bytes_read
);
188 return (ARCHIVE_FATAL
);
190 /* Initialize decompression routine with the first block of data. */
191 e
= (a
->decompressors
[high_bidder
].init
)(a
, buffer
, bytes_read
);
194 a
->state
= ARCHIVE_STATE_HEADER
;
200 * Allow each registered decompression routine to bid on whether it
201 * wants to handle this stream. Return index of winning bidder.
204 choose_decompressor(struct archive
*a
, const void *buffer
, size_t bytes_read
)
206 int decompression_slots
, i
, bid
, best_bid
, best_bid_slot
;
208 decompression_slots
= sizeof(a
->decompressors
) /
209 sizeof(a
->decompressors
[0]);
214 for (i
= 0; i
< decompression_slots
; i
++) {
215 if (a
->decompressors
[i
].bid
) {
216 bid
= (a
->decompressors
[i
].bid
)(buffer
, bytes_read
);
217 if ((bid
> best_bid
) || (best_bid_slot
< 0)) {
225 * There were no bidders; this is a serious programmer error
226 * and demands a quick and definitive abort.
228 if (best_bid_slot
< 0)
229 __archive_errx(1, "No decompressors were registered; you "
230 "must call at least one "
231 "archive_read_support_compression_XXX function in order "
232 "to successfully read an archive.");
235 * There were bidders, but no non-zero bids; this means we can't
236 * support this stream.
239 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
240 "Unrecognized archive format");
241 return (ARCHIVE_FATAL
);
244 return (best_bid_slot
);
248 * Read header of next entry.
251 archive_read_next_header(struct archive
*a
, struct archive_entry
**entryp
)
253 struct archive_entry
*entry
;
256 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
,
257 ARCHIVE_STATE_HEADER
| ARCHIVE_STATE_DATA
, "archive_read_next_header");
261 archive_entry_clear(entry
);
262 archive_string_empty(&a
->error_string
);
265 * If client didn't consume entire data, skip any remainder
266 * (This is especially important for GNU incremental directories.)
268 if (a
->state
== ARCHIVE_STATE_DATA
) {
269 ret
= archive_read_data_skip(a
);
270 if (ret
== ARCHIVE_EOF
) {
271 archive_set_error(a
, EIO
, "Premature end-of-file.");
272 a
->state
= ARCHIVE_STATE_FATAL
;
273 return (ARCHIVE_FATAL
);
275 if (ret
!= ARCHIVE_OK
)
279 /* Record start-of-header. */
280 a
->header_position
= a
->file_position
;
282 slot
= choose_format(a
);
284 a
->state
= ARCHIVE_STATE_FATAL
;
285 return (ARCHIVE_FATAL
);
287 a
->format
= &(a
->formats
[slot
]);
288 a
->pformat_data
= &(a
->format
->format_data
);
289 ret
= (a
->format
->read_header
)(a
, entry
);
292 * EOF and FATAL are persistent at this layer. By
293 * modifying the state, we gaurantee that future calls to
294 * read a header or read data will fail.
298 a
->state
= ARCHIVE_STATE_EOF
;
301 a
->state
= ARCHIVE_STATE_DATA
;
304 a
->state
= ARCHIVE_STATE_DATA
;
309 a
->state
= ARCHIVE_STATE_FATAL
;
314 a
->read_data_output_offset
= 0;
315 a
->read_data_remaining
= 0;
320 * Allow each registered format to bid on whether it wants to handle
321 * the next entry. Return index of winning bidder.
324 choose_format(struct archive
*a
)
331 slots
= sizeof(a
->formats
) / sizeof(a
->formats
[0]);
335 /* Set up a->format and a->pformat_data for convenience of bidders. */
336 a
->format
= &(a
->formats
[0]);
337 for (i
= 0; i
< slots
; i
++, a
->format
++) {
338 if (a
->format
->bid
) {
339 a
->pformat_data
= &(a
->format
->format_data
);
340 bid
= (a
->format
->bid
)(a
);
341 if (bid
== ARCHIVE_FATAL
)
342 return (ARCHIVE_FATAL
);
343 if ((bid
> best_bid
) || (best_bid_slot
< 0)) {
351 * There were no bidders; this is a serious programmer error
352 * and demands a quick and definitive abort.
354 if (best_bid_slot
< 0)
355 __archive_errx(1, "No formats were registered; you must "
356 "invoke at least one archive_read_support_format_XXX "
357 "function in order to successfully read an archive.");
360 * There were bidders, but no non-zero bids; this means we
361 * can't support this stream.
364 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
365 "Unrecognized archive format");
366 return (ARCHIVE_FATAL
);
369 return (best_bid_slot
);
373 * Return the file offset (within the uncompressed data stream) where
374 * the last header started.
377 archive_read_header_position(struct archive
*a
)
379 return (a
->header_position
);
383 * Read data from an archive entry, using a read(2)-style interface.
384 * This is a convenience routine that just calls
385 * archive_read_data_block and copies the results into the client
386 * buffer, filling any gaps with zero bytes. Clients using this
387 * API can be completely ignorant of sparse-file issues; sparse files
388 * will simply be padded with nulls.
390 * DO NOT intermingle calls to this function and archive_read_data_block
391 * to read a single entry body.
394 archive_read_data(struct archive
*a
, void *buff
, size_t s
)
405 if (a
->read_data_remaining
<= 0) {
406 r
= archive_read_data_block(a
,
407 (const void **)&a
->read_data_block
,
408 &a
->read_data_remaining
,
409 &a
->read_data_offset
);
410 if (r
== ARCHIVE_EOF
)
413 * Error codes are all negative, so the status
414 * return here cannot be confused with a valid
415 * byte count. (ARCHIVE_OK is zero.)
421 if (a
->read_data_offset
< a
->read_data_output_offset
) {
422 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
423 "Encountered out-of-order sparse blocks");
424 return (ARCHIVE_RETRY
);
426 len
= a
->read_data_remaining
;
429 memcpy(dest
, a
->read_data_block
, len
);
431 a
->read_data_block
+= len
;
432 a
->read_data_remaining
-= len
;
433 a
->read_data_output_offset
+= len
;
434 a
->read_data_offset
+= len
;
443 * Skip over all remaining data in this entry.
446 archive_read_data_skip(struct archive
*a
)
453 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_DATA
, "archive_read_data_skip");
455 if (a
->format
->read_data_skip
!= NULL
)
456 r
= (a
->format
->read_data_skip
)(a
);
458 while ((r
= archive_read_data_block(a
, &buff
, &size
, &offset
))
463 if (r
== ARCHIVE_EOF
)
466 a
->state
= ARCHIVE_STATE_HEADER
;
471 * Read the next block of entry data from the archive.
472 * This is a zero-copy interface; the client receives a pointer,
473 * size, and file offset of the next available block of data.
475 * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if
476 * the end of entry is encountered.
479 archive_read_data_block(struct archive
*a
,
480 const void **buff
, size_t *size
, off_t
*offset
)
482 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_DATA
, "archive_read_data_block");
484 if (a
->format
->read_data
== NULL
) {
485 archive_set_error(a
, ARCHIVE_ERRNO_PROGRAMMER
,
487 "No format_read_data_block function registered");
488 return (ARCHIVE_FATAL
);
491 return (a
->format
->read_data
)(a
, buff
, size
, offset
);
495 * Close the file and release most resources.
497 * Be careful: client might just call read_new and then read_finish.
498 * Don't assume we actually read anything or performed any non-trivial
502 archive_read_close(struct archive
*a
)
504 int r
= ARCHIVE_OK
, r1
= ARCHIVE_OK
;
506 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_ANY
, "archive_read_close");
507 a
->state
= ARCHIVE_STATE_CLOSED
;
509 /* Call cleanup functions registered by optional components. */
510 if (a
->cleanup_archive_extract
!= NULL
)
511 r
= (a
->cleanup_archive_extract
)(a
);
513 /* TODO: Finish the format processing. */
515 /* Close the input machinery. */
516 if (a
->compression_finish
!= NULL
) {
517 r1
= (a
->compression_finish
)(a
);
526 * Release memory and other resources.
528 #if ARCHIVE_API_VERSION > 1
531 /* Temporarily allow library to compile with either 1.x or 2.0 API. */
534 archive_read_finish(struct archive
*a
)
540 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_ANY
, "archive_read_finish");
541 if (a
->state
!= ARCHIVE_STATE_CLOSED
)
542 r
= archive_read_close(a
);
544 /* Cleanup format-specific data. */
545 slots
= sizeof(a
->formats
) / sizeof(a
->formats
[0]);
546 for (i
= 0; i
< slots
; i
++) {
547 a
->pformat_data
= &(a
->formats
[i
].format_data
);
548 if (a
->formats
[i
].cleanup
)
549 (a
->formats
[i
].cleanup
)(a
);
552 /* Casting a pointer to int allows us to remove 'const.' */
553 free((void *)(uintptr_t)(const void *)a
->nulls
);
554 archive_string_free(&a
->error_string
);
556 archive_entry_free(a
->entry
);
559 #if ARCHIVE_API_VERSION > 1
565 * Used internally by read format handlers to register their bid and
566 * initialization functions.
569 __archive_read_register_format(struct archive
*a
,
571 int (*bid
)(struct archive
*),
572 int (*read_header
)(struct archive
*, struct archive_entry
*),
573 int (*read_data
)(struct archive
*, const void **, size_t *, off_t
*),
574 int (*read_data_skip
)(struct archive
*),
575 int (*cleanup
)(struct archive
*))
579 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_NEW
, "__archive_read_register_format");
581 number_slots
= sizeof(a
->formats
) / sizeof(a
->formats
[0]);
583 for (i
= 0; i
< number_slots
; i
++) {
584 if (a
->formats
[i
].bid
== bid
)
585 return (ARCHIVE_WARN
); /* We've already installed */
586 if (a
->formats
[i
].bid
== NULL
) {
587 a
->formats
[i
].bid
= bid
;
588 a
->formats
[i
].read_header
= read_header
;
589 a
->formats
[i
].read_data
= read_data
;
590 a
->formats
[i
].read_data_skip
= read_data_skip
;
591 a
->formats
[i
].cleanup
= cleanup
;
592 a
->formats
[i
].format_data
= format_data
;
597 __archive_errx(1, "Not enough slots for format registration");
598 return (ARCHIVE_FATAL
); /* Never actually called. */
602 * Used internally by decompression routines to register their bid and
603 * initialization functions.
606 __archive_read_register_compression(struct archive
*a
,
607 int (*bid
)(const void *, size_t),
608 int (*init
)(struct archive
*, const void *, size_t))
612 __archive_check_magic(a
, ARCHIVE_READ_MAGIC
, ARCHIVE_STATE_NEW
, "__archive_read_register_compression");
614 number_slots
= sizeof(a
->decompressors
) / sizeof(a
->decompressors
[0]);
616 for (i
= 0; i
< number_slots
; i
++) {
617 if (a
->decompressors
[i
].bid
== bid
)
618 return (ARCHIVE_OK
); /* We've already installed */
619 if (a
->decompressors
[i
].bid
== NULL
) {
620 a
->decompressors
[i
].bid
= bid
;
621 a
->decompressors
[i
].init
= init
;
626 __archive_errx(1, "Not enough slots for compression registration");
627 return (ARCHIVE_FATAL
); /* Never actually executed. */