2 * Copyright (c) 2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_zip.c,v 1.8 2005/10/12 15:38:45 kientzle Exp $");
40 #include "archive_entry.h"
41 #include "archive_private.h"
44 /* entry_bytes_remaining is the number of bytes we expect. */
45 off_t entry_bytes_remaining
;
48 /* These count the number of bytes actually read for the entry. */
49 off_t entry_compressed_bytes_read
;
50 off_t entry_uncompressed_bytes_read
;
56 const char * compression_name
;
64 /* Flags to mark progress of decompression. */
67 char end_of_entry_cleanup
;
70 ssize_t filename_length
;
72 off_t uncompressed_size
;
73 off_t compressed_size
;
75 unsigned char *uncompressed_buffer
;
76 size_t uncompressed_buffer_size
;
81 struct archive_string pathname
;
82 struct archive_string extra
;
86 #define ZIP_LENGTH_AT_END 8
88 struct zip_file_header
{
95 char compressed_size
[4];
96 char uncompressed_size
[4];
97 char filename_length
[2];
101 static const char *compression_names
[] = {
113 static int archive_read_format_zip_bid(struct archive
*);
114 static int archive_read_format_zip_cleanup(struct archive
*);
115 static int archive_read_format_zip_read_data(struct archive
*,
116 const void **, size_t *, off_t
*);
117 static int archive_read_format_zip_read_data_skip(struct archive
*a
);
118 static int archive_read_format_zip_read_header(struct archive
*,
119 struct archive_entry
*);
120 static int i2(const char *);
121 static int i4(const char *);
122 static unsigned int u2(const char *);
123 static unsigned int u4(const char *);
124 static uint64_t u8(const char *);
125 static int zip_read_data_deflate(struct archive
*a
, const void **buff
,
126 size_t *size
, off_t
*offset
);
127 static int zip_read_data_none(struct archive
*a
, const void **buff
,
128 size_t *size
, off_t
*offset
);
129 static int zip_read_file_header(struct archive
*a
,
130 struct archive_entry
*entry
, struct zip
*zip
);
131 static time_t zip_time(const char *);
132 static void process_extra(const void* extra
, struct zip
* zip
);
135 archive_read_support_format_zip(struct archive
*a
)
140 zip
= malloc(sizeof(*zip
));
142 archive_set_error(a
, ENOMEM
, "Can't allocate zip data");
143 return (ARCHIVE_FATAL
);
145 memset(zip
, 0, sizeof(*zip
));
147 r
= __archive_read_register_format(a
,
149 archive_read_format_zip_bid
,
150 archive_read_format_zip_read_header
,
151 archive_read_format_zip_read_data
,
152 archive_read_format_zip_read_data_skip
,
153 archive_read_format_zip_cleanup
);
162 archive_read_format_zip_bid(struct archive
*a
)
169 if (a
->archive_format
== ARCHIVE_FORMAT_ZIP
)
172 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, 4);
177 if (p
[0] == 'P' && p
[1] == 'K') {
179 if (p
[2] == '\001' && p
[3] == '\002')
181 else if (p
[2] == '\003' && p
[3] == '\004')
183 else if (p
[2] == '\005' && p
[3] == '\006')
185 else if (p
[2] == '\007' && p
[3] == '\010')
192 archive_read_format_zip_read_header(struct archive
*a
,
193 struct archive_entry
*entry
)
197 const char *signature
;
200 a
->archive_format
= ARCHIVE_FORMAT_ZIP
;
201 if (a
->archive_format_name
== NULL
)
202 a
->archive_format_name
= "ZIP";
204 zip
= *(a
->pformat_data
);
205 zip
->decompress_init
= 0;
206 zip
->end_of_entry
= 0;
207 zip
->end_of_entry_cleanup
= 0;
208 zip
->entry_uncompressed_bytes_read
= 0;
209 zip
->entry_compressed_bytes_read
= 0;
210 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, 4);
212 return (ARCHIVE_FATAL
);
215 if (signature
[0] != 'P' || signature
[1] != 'K') {
216 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
218 return (ARCHIVE_FATAL
);
221 if (signature
[2] == '\001' && signature
[3] == '\002') {
222 /* Beginning of central directory. */
223 return (ARCHIVE_EOF
);
226 if (signature
[2] == '\003' && signature
[3] == '\004') {
227 /* Regular file entry. */
228 return (zip_read_file_header(a
, entry
, zip
));
231 if (signature
[2] == '\005' && signature
[3] == '\006') {
232 /* End-of-archive record. */
233 return (ARCHIVE_EOF
);
236 if (signature
[2] == '\007' && signature
[3] == '\010') {
238 * We should never encounter this record here;
239 * see ZIP_LENGTH_AT_END handling below for details.
241 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
242 "Bad ZIP file: Unexpected end-of-entry record");
243 return (ARCHIVE_FATAL
);
246 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
247 "Damaged ZIP file or unsupported format variant (%d,%d)",
248 signature
[2], signature
[3]);
249 return (ARCHIVE_FATAL
);
253 zip_read_file_header(struct archive
*a
, struct archive_entry
*entry
,
256 const struct zip_file_header
*p
;
262 (a
->compression_read_ahead
)(a
, &h
, sizeof(struct zip_file_header
));
263 if (bytes_read
< (int)sizeof(struct zip_file_header
)) {
264 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
265 "Truncated ZIP file header");
266 return (ARCHIVE_FATAL
);
270 zip
->version
= p
->version
[0];
271 zip
->system
= p
->version
[1];
272 zip
->flags
= i2(p
->flags
);
273 zip
->compression
= i2(p
->compression
);
274 if (zip
->compression
<
275 sizeof(compression_names
)/sizeof(compression_names
[0]))
276 zip
->compression_name
= compression_names
[zip
->compression
];
278 zip
->compression_name
= "??";
279 zip
->mtime
= zip_time(p
->timedate
);
285 zip
->crc32
= i4(p
->crc32
);
286 zip
->filename_length
= i2(p
->filename_length
);
287 zip
->extra_length
= i2(p
->extra_length
);
288 zip
->uncompressed_size
= u4(p
->uncompressed_size
);
289 zip
->compressed_size
= u4(p
->compressed_size
);
291 (a
->compression_read_consume
)(a
, sizeof(struct zip_file_header
));
294 /* Read the filename. */
295 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, zip
->filename_length
);
296 if (bytes_read
< zip
->filename_length
) {
297 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
298 "Truncated ZIP file header");
299 return (ARCHIVE_FATAL
);
301 archive_string_ensure(&zip
->pathname
, zip
->filename_length
);
302 archive_strncpy(&zip
->pathname
, h
, zip
->filename_length
);
303 (a
->compression_read_consume
)(a
, zip
->filename_length
);
304 archive_entry_set_pathname(entry
, zip
->pathname
.s
);
306 if (zip
->pathname
.s
[archive_strlen(&zip
->pathname
) - 1] == '/')
307 zip
->mode
= S_IFDIR
| 0777;
309 zip
->mode
= S_IFREG
| 0777;
311 /* Read the extra data. */
312 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, zip
->extra_length
);
313 if (bytes_read
< zip
->extra_length
) {
314 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
315 "Truncated ZIP file header");
316 return (ARCHIVE_FATAL
);
318 process_extra(h
, zip
);
319 (a
->compression_read_consume
)(a
, zip
->extra_length
);
321 /* Populate some additional entry fields: */
322 memset(&st
, 0, sizeof(st
));
323 st
.st_mode
= zip
->mode
;
324 st
.st_uid
= zip
->uid
;
325 st
.st_gid
= zip
->gid
;
326 st
.st_mtime
= zip
->mtime
;
327 st
.st_ctime
= zip
->ctime
;
328 st
.st_atime
= zip
->atime
;
329 st
.st_size
= zip
->uncompressed_size
;
330 archive_entry_copy_stat(entry
, &st
);
332 zip
->entry_bytes_remaining
= zip
->compressed_size
;
333 zip
->entry_offset
= 0;
335 /* Set up a more descriptive format name. */
336 sprintf(zip
->format_name
, "ZIP %d.%d (%s)",
337 zip
->version
/ 10, zip
->version
% 10,
338 zip
->compression_name
);
339 a
->archive_format_name
= zip
->format_name
;
344 /* Convert an MSDOS-style date/time into Unix-style time. */
346 zip_time(const char *p
)
351 msTime
= (0xff & (unsigned)p
[0]) + 256 * (0xff & (unsigned)p
[1]);
352 msDate
= (0xff & (unsigned)p
[2]) + 256 * (0xff & (unsigned)p
[3]);
354 memset(&ts
, 0, sizeof(ts
));
355 ts
.tm_year
= ((msDate
>> 9) & 0x7f) + 80; /* Years since 1900. */
356 ts
.tm_mon
= ((msDate
>> 5) & 0x0f) - 1; /* Month number. */
357 ts
.tm_mday
= msDate
& 0x1f; /* Day of month. */
358 ts
.tm_hour
= (msTime
>> 11) & 0x1f;
359 ts
.tm_min
= (msTime
>> 5) & 0x3f;
360 ts
.tm_sec
= (msTime
<< 1) & 0x3e;
366 archive_read_format_zip_read_data(struct archive
*a
,
367 const void **buff
, size_t *size
, off_t
*offset
)
372 zip
= *(a
->pformat_data
);
375 * If we hit end-of-entry last time, clean up and return
376 * ARCHIVE_EOF this time.
378 if (zip
->end_of_entry
) {
379 if (!zip
->end_of_entry_cleanup
) {
380 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
384 (a
->compression_read_ahead
)(a
, &h
, 16);
385 if (bytes_read
< 16) {
387 ARCHIVE_ERRNO_FILE_FORMAT
,
388 "Truncated ZIP end-of-file record");
389 return (ARCHIVE_FATAL
);
392 zip
->crc32
= i4(p
+ 4);
393 zip
->compressed_size
= u4(p
+ 8);
394 zip
->uncompressed_size
= u4(p
+ 12);
395 bytes_read
= (a
->compression_read_consume
)(a
, 16);
398 /* Check file size, CRC against these values. */
399 if (zip
->compressed_size
!= zip
->entry_compressed_bytes_read
) {
400 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
401 "ZIP compressed data is wrong size");
402 return (ARCHIVE_WARN
);
404 if (zip
->uncompressed_size
!= zip
->entry_uncompressed_bytes_read
) {
405 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
406 "ZIP uncompressed data is wrong size");
407 return (ARCHIVE_WARN
);
409 /* TODO: Compute CRC. */
411 if (zip->crc32 != zip->entry_crc32_calculated) {
412 archive_set_error(a, ARCHIVE_ERRNO_MISC,
413 "ZIP data CRC error");
414 return (ARCHIVE_WARN);
417 /* End-of-entry cleanup done. */
418 zip
->end_of_entry_cleanup
= 1;
420 return (ARCHIVE_EOF
);
423 switch(zip
->compression
) {
424 case 0: /* No compression. */
425 r
= zip_read_data_none(a
, buff
, size
, offset
);
427 case 8: /* Deflate compression. */
428 r
= zip_read_data_deflate(a
, buff
, size
, offset
);
430 default: /* Unsupported compression. */
434 /* Return a warning. */
435 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
436 "Unsupported ZIP compression method (%s)",
437 zip
->compression_name
);
438 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
440 * ZIP_LENGTH_AT_END requires us to
441 * decompress the entry in order to
442 * skip it, but we don't know this
443 * compression method, so we give up.
447 /* We know compressed size; just skip it. */
448 archive_read_format_zip_read_data_skip(a
);
457 * Read "uncompressed" data. According to the current specification,
458 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
459 * initial file header are supposed to be set to zero. This would, of
460 * course, make it impossible for us to read the archive, since we
461 * couldn't determine the end of the file data. Info-ZIP seems to
462 * include the real size fields both before and after the data in this
463 * case (the CRC only appears afterwards), so this works as you would
466 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
467 * zip->end_of_entry if it consumes all of the data.
470 zip_read_data_none(struct archive
*a
, const void **buff
,
471 size_t *size
, off_t
*offset
)
476 zip
= *(a
->pformat_data
);
478 if (zip
->entry_bytes_remaining
== 0) {
481 *offset
= zip
->entry_offset
;
482 zip
->end_of_entry
= 1;
486 * Note: '1' here is a performance optimization.
487 * Recall that the decompression layer returns a count of
488 * available bytes; asking for more than that forces the
489 * decompressor to combine reads by copying data.
491 bytes_avail
= (a
->compression_read_ahead
)(a
, buff
, 1);
492 if (bytes_avail
<= 0) {
493 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
494 "Truncated ZIP file data");
495 return (ARCHIVE_FATAL
);
497 if (bytes_avail
> zip
->entry_bytes_remaining
)
498 bytes_avail
= zip
->entry_bytes_remaining
;
499 (a
->compression_read_consume
)(a
, bytes_avail
);
501 *offset
= zip
->entry_offset
;
502 zip
->entry_offset
+= *size
;
503 zip
->entry_bytes_remaining
-= *size
;
504 zip
->entry_uncompressed_bytes_read
+= *size
;
505 zip
->entry_compressed_bytes_read
+= *size
;
511 zip_read_data_deflate(struct archive
*a
, const void **buff
,
512 size_t *size
, off_t
*offset
)
516 const void *compressed_buff
;
519 zip
= *(a
->pformat_data
);
521 /* If the buffer hasn't been allocated, allocate it now. */
522 if (zip
->uncompressed_buffer
== NULL
) {
523 zip
->uncompressed_buffer_size
= 32 * 1024;
524 zip
->uncompressed_buffer
525 = malloc(zip
->uncompressed_buffer_size
);
526 if (zip
->uncompressed_buffer
== NULL
) {
527 archive_set_error(a
, ENOMEM
,
528 "No memory for ZIP decompression");
529 return (ARCHIVE_FATAL
);
533 /* If we haven't yet read any data, initialize the decompressor. */
534 if (!zip
->decompress_init
) {
535 r
= inflateInit2(&zip
->stream
,
536 -15 /* Don't check for zlib header */);
538 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
539 "Can't initialize ZIP decompression.");
540 return (ARCHIVE_FATAL
);
542 zip
->decompress_init
= 1;
546 * Note: '1' here is a performance optimization.
547 * Recall that the decompression layer returns a count of
548 * available bytes; asking for more than that forces the
549 * decompressor to combine reads by copying data.
551 bytes_avail
= (a
->compression_read_ahead
)(a
, &compressed_buff
, 1);
552 if (bytes_avail
<= 0) {
553 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
554 "Truncated ZIP file body");
555 return (ARCHIVE_FATAL
);
559 * A bug in zlib.h: stream.next_in should be marked 'const'
560 * but isn't (the library never alters data through the
561 * next_in pointer, only reads it). The result: this ugly
562 * cast to remove 'const'.
564 zip
->stream
.next_in
= (void *)(uintptr_t)(const void *)compressed_buff
;
565 zip
->stream
.avail_in
= bytes_avail
;
566 zip
->stream
.total_in
= 0;
567 zip
->stream
.next_out
= zip
->uncompressed_buffer
;
568 zip
->stream
.avail_out
= zip
->uncompressed_buffer_size
;
569 zip
->stream
.total_out
= 0;
571 r
= inflate(&zip
->stream
, 0);
576 zip
->end_of_entry
= 1;
579 archive_set_error(a
, ENOMEM
,
580 "Out of memory for ZIP decompression");
581 return (ARCHIVE_FATAL
);
583 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
584 "ZIP decompression failed (%d)", r
);
585 return (ARCHIVE_FATAL
);
588 /* Consume as much as the compressor actually used. */
589 bytes_avail
= zip
->stream
.total_in
;
590 (a
->compression_read_consume
)(a
, bytes_avail
);
591 zip
->entry_bytes_remaining
-= bytes_avail
;
592 zip
->entry_compressed_bytes_read
+= bytes_avail
;
594 *offset
= zip
->entry_offset
;
595 *size
= zip
->stream
.total_out
;
596 zip
->entry_uncompressed_bytes_read
+= *size
;
597 *buff
= zip
->uncompressed_buffer
;
598 zip
->entry_offset
+= *size
;
603 zip_read_data_deflate(struct archive
*a
, const void **buff
,
604 size_t *size
, off_t
*offset
)
611 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
612 "libarchive compiled without deflate support (no libz)");
613 return (ARCHIVE_FATAL
);
618 archive_read_format_zip_read_data_skip(struct archive
*a
)
621 const void *buff
= NULL
;
624 zip
= *(a
->pformat_data
);
627 * If the length is at the end, we have no choice but
628 * to decompress all the data to find the end marker.
630 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
635 r
= archive_read_format_zip_read_data(a
, &buff
,
637 } while (r
== ARCHIVE_OK
);
642 * If the length is at the beginning, we can skip the
643 * compressed data much more quickly.
645 while (zip
->entry_bytes_remaining
> 0) {
646 bytes_avail
= (a
->compression_read_ahead
)(a
, &buff
, 1);
647 if (bytes_avail
<= 0) {
648 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
649 "Truncated ZIP file body");
650 return (ARCHIVE_FATAL
);
652 if (bytes_avail
> zip
->entry_bytes_remaining
)
653 bytes_avail
= zip
->entry_bytes_remaining
;
654 (a
->compression_read_consume
)(a
, bytes_avail
);
655 zip
->entry_bytes_remaining
-= bytes_avail
;
657 /* This entry is finished and done. */
658 zip
->end_of_entry_cleanup
= zip
->end_of_entry
= 1;
663 archive_read_format_zip_cleanup(struct archive
*a
)
667 zip
= *(a
->pformat_data
);
668 if (zip
->uncompressed_buffer
!= NULL
)
669 free(zip
->uncompressed_buffer
);
670 archive_string_free(&(zip
->pathname
));
671 archive_string_free(&(zip
->extra
));
673 *(a
->pformat_data
) = NULL
;
680 return ((0xff & (int)p
[0]) + 256 * (0xff & (int)p
[1]));
687 return ((0xffff & i2(p
)) + 0x10000 * (0xffff & i2(p
+2)));
693 return ((0xff & (unsigned int)p
[0]) + 256 * (0xff & (unsigned int)p
[1]));
699 return u2(p
) + 0x10000 * u2(p
+2);
705 return u4(p
) + 0x100000000LL
* u4(p
+4);
709 * The extra data is stored as a list of
710 * id1+size1+data1 + id2+size2+data2 ...
711 * triplets. id and size are 2 bytes each.
714 process_extra(const void* extra
, struct zip
* zip
)
717 const char *p
= extra
;
718 while (offset
< zip
->extra_length
- 4)
720 unsigned short headerid
= u2(p
+ offset
);
721 unsigned short datasize
= u2(p
+ offset
+ 2);
723 if (offset
+ datasize
> zip
->extra_length
)
726 fprintf(stderr
, "Header id 0x%04x, length %d\n",
731 /* Zip64 extended information extra field. */
733 zip
->uncompressed_size
= u8(p
+ offset
);
735 zip
->compressed_size
= u8(p
+ offset
+ 8);
739 /* Extended time field "UT". */
740 int flags
= p
[offset
];
743 /* Flag bits indicate which dates are present. */
747 fprintf(stderr
, "mtime: %d -> %d\n",
748 zip
->mtime
, i4(p
+ offset
));
752 zip
->mtime
= i4(p
+ offset
);
760 zip
->atime
= i4(p
+ offset
);
768 zip
->ctime
= i4(p
+ offset
);
775 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
777 fprintf(stderr
, "uid %d gid %d\n",
778 i2(p
+ offset
), i2(p
+ offset
+ 2));
781 zip
->uid
= i2(p
+ offset
);
783 zip
->gid
= i2(p
+ offset
+ 2);
791 if (offset
!= zip
->extra_length
)
794 "Extra data field contents do not match reported size!");