2 * Copyright (c) 2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_zip.c,v 1.5 2005/04/06 04:19:30 kientzle Exp $");
40 #include "archive_entry.h"
41 #include "archive_private.h"
44 /* entry_bytes_remaining is the number of bytes we expect. */
45 off_t entry_bytes_remaining
;
48 /* These count the number of bytes actually read for the entry. */
49 off_t entry_compressed_bytes_read
;
50 off_t entry_uncompressed_bytes_read
;
56 const char * compression_name
;
64 /* Flags to mark progress of decompression. */
67 char end_of_entry_cleanup
;
70 ssize_t filename_length
;
72 off_t uncompressed_size
;
73 off_t compressed_size
;
75 unsigned char *uncompressed_buffer
;
76 size_t uncompressed_buffer_size
;
81 struct archive_string pathname
;
82 struct archive_string extra
;
86 #define ZIP_LENGTH_AT_END 8
88 struct zip_file_header
{
95 char compressed_size
[4];
96 char uncompressed_size
[4];
97 char filename_length
[2];
101 const char *compression_names
[] = {
113 static int archive_read_format_zip_bid(struct archive
*);
114 static int archive_read_format_zip_cleanup(struct archive
*);
115 static int archive_read_format_zip_read_data(struct archive
*,
116 const void **, size_t *, off_t
*);
117 static int archive_read_format_zip_read_data_skip(struct archive
*a
);
118 static int archive_read_format_zip_read_header(struct archive
*,
119 struct archive_entry
*);
120 static int i2(const char *);
121 static int i4(const char *);
122 static unsigned int u2(const char *);
123 static unsigned int u4(const char *);
124 static uint64_t u8(const char *);
125 static int zip_read_data_deflate(struct archive
*a
, const void **buff
,
126 size_t *size
, off_t
*offset
);
127 static int zip_read_data_none(struct archive
*a
, const void **buff
,
128 size_t *size
, off_t
*offset
);
129 static int zip_read_file_header(struct archive
*a
,
130 struct archive_entry
*entry
, struct zip
*zip
);
131 static time_t zip_time(const char *);
132 static void process_extra(const void* extra
, struct zip
* zip
);
135 archive_read_support_format_zip(struct archive
*a
)
140 zip
= malloc(sizeof(*zip
));
141 memset(zip
, 0, sizeof(*zip
));
143 r
= __archive_read_register_format(a
,
145 archive_read_format_zip_bid
,
146 archive_read_format_zip_read_header
,
147 archive_read_format_zip_read_data
,
148 archive_read_format_zip_read_data_skip
,
149 archive_read_format_zip_cleanup
);
158 archive_read_format_zip_bid(struct archive
*a
)
165 if (a
->archive_format
== ARCHIVE_FORMAT_ZIP
)
168 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, 4);
173 if (p
[0] == 'P' && p
[1] == 'K') {
175 if (p
[2] == '\001' && p
[3] == '\002')
177 else if (p
[2] == '\003' && p
[3] == '\004')
179 else if (p
[2] == '\005' && p
[3] == '\006')
181 else if (p
[2] == '\007' && p
[3] == '\010')
188 archive_read_format_zip_read_header(struct archive
*a
,
189 struct archive_entry
*entry
)
193 const char *signature
;
196 a
->archive_format
= ARCHIVE_FORMAT_ZIP
;
197 if (a
->archive_format_name
== NULL
)
198 a
->archive_format_name
= "ZIP";
200 zip
= *(a
->pformat_data
);
201 zip
->decompress_init
= 0;
202 zip
->end_of_entry
= 0;
203 zip
->end_of_entry_cleanup
= 0;
204 zip
->entry_uncompressed_bytes_read
= 0;
205 zip
->entry_compressed_bytes_read
= 0;
206 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, 4);
208 return (ARCHIVE_FATAL
);
211 if (signature
[0] != 'P' || signature
[1] != 'K') {
212 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
214 return (ARCHIVE_FATAL
);
217 if (signature
[2] == '\001' && signature
[3] == '\002') {
218 /* Beginning of central directory. */
219 return (ARCHIVE_EOF
);
222 if (signature
[2] == '\003' && signature
[3] == '\004') {
223 /* Regular file entry. */
224 return (zip_read_file_header(a
, entry
, zip
));
227 if (signature
[2] == '\005' && signature
[3] == '\006') {
228 /* End-of-archive record. */
229 return (ARCHIVE_EOF
);
232 if (signature
[2] == '\007' && signature
[3] == '\010') {
234 * We should never encounter this record here;
235 * see ZIP_LENGTH_AT_END handling below for details.
237 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
238 "Bad ZIP file: Unexpected end-of-entry record");
239 return (ARCHIVE_FATAL
);
242 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
243 "Damaged ZIP file or unsupported format variant (%d,%d)",
244 signature
[2], signature
[3]);
245 return (ARCHIVE_FATAL
);
249 zip_read_file_header(struct archive
*a
, struct archive_entry
*entry
,
252 const struct zip_file_header
*p
;
258 (a
->compression_read_ahead
)(a
, &h
, sizeof(struct zip_file_header
));
259 if (bytes_read
< (int)sizeof(struct zip_file_header
)) {
260 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
261 "Truncated ZIP file header");
262 return (ARCHIVE_FATAL
);
266 zip
->version
= p
->version
[0];
267 zip
->system
= p
->version
[1];
268 zip
->flags
= i2(p
->flags
);
269 zip
->compression
= i2(p
->compression
);
270 if (zip
->compression
<
271 sizeof(compression_names
)/sizeof(compression_names
[0]))
272 zip
->compression_name
= compression_names
[zip
->compression
];
274 zip
->compression_name
= "??";
275 zip
->mtime
= zip_time(p
->timedate
);
281 zip
->crc32
= i4(p
->crc32
);
282 zip
->filename_length
= i2(p
->filename_length
);
283 zip
->extra_length
= i2(p
->extra_length
);
284 zip
->uncompressed_size
= u4(p
->uncompressed_size
);
285 zip
->compressed_size
= u4(p
->compressed_size
);
287 (a
->compression_read_consume
)(a
, sizeof(struct zip_file_header
));
290 /* Read the filename. */
291 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, zip
->filename_length
);
292 if (bytes_read
< zip
->filename_length
) {
293 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
294 "Truncated ZIP file header");
295 return (ARCHIVE_FATAL
);
297 archive_string_ensure(&zip
->pathname
, zip
->filename_length
);
298 archive_strncpy(&zip
->pathname
, h
, zip
->filename_length
);
299 (a
->compression_read_consume
)(a
, zip
->filename_length
);
300 archive_entry_set_pathname(entry
, zip
->pathname
.s
);
302 if (zip
->pathname
.s
[archive_strlen(&zip
->pathname
) - 1] == '/')
303 zip
->mode
= S_IFDIR
| 0777;
305 zip
->mode
= S_IFREG
| 0777;
307 /* Read the extra data. */
308 bytes_read
= (a
->compression_read_ahead
)(a
, &h
, zip
->extra_length
);
309 if (bytes_read
< zip
->extra_length
) {
310 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
311 "Truncated ZIP file header");
312 return (ARCHIVE_FATAL
);
314 process_extra(h
, zip
);
315 (a
->compression_read_consume
)(a
, zip
->extra_length
);
317 /* Populate some additional entry fields: */
318 memset(&st
, 0, sizeof(st
));
319 st
.st_mode
= zip
->mode
;
320 st
.st_uid
= zip
->uid
;
321 st
.st_gid
= zip
->gid
;
322 st
.st_mtime
= zip
->mtime
;
323 st
.st_ctime
= zip
->ctime
;
324 st
.st_atime
= zip
->atime
;
325 st
.st_size
= zip
->uncompressed_size
;
326 archive_entry_copy_stat(entry
, &st
);
328 zip
->entry_bytes_remaining
= zip
->compressed_size
;
329 zip
->entry_offset
= 0;
331 /* Set up a more descriptive format name. */
332 sprintf(zip
->format_name
, "ZIP %d.%d (%s)",
333 zip
->version
/ 10, zip
->version
% 10,
334 zip
->compression_name
);
335 a
->archive_format_name
= zip
->format_name
;
340 /* Convert an MSDOS-style date/time into Unix-style time. */
342 zip_time(const char *p
)
347 msTime
= (0xff & (unsigned)p
[0]) + 256 * (0xff & (unsigned)p
[1]);
348 msDate
= (0xff & (unsigned)p
[2]) + 256 * (0xff & (unsigned)p
[3]);
350 memset(&ts
, 0, sizeof(ts
));
351 ts
.tm_year
= ((msDate
>> 9) & 0x7f) + 80; /* Years since 1900. */
352 ts
.tm_mon
= ((msDate
>> 5) & 0x0f) - 1; /* Month number. */
353 ts
.tm_mday
= msDate
& 0x1f; /* Day of month. */
354 ts
.tm_hour
= (msTime
>> 11) & 0x1f;
355 ts
.tm_min
= (msTime
>> 5) & 0x3f;
356 ts
.tm_sec
= (msTime
<< 1) & 0x3e;
362 archive_read_format_zip_read_data(struct archive
*a
,
363 const void **buff
, size_t *size
, off_t
*offset
)
368 zip
= *(a
->pformat_data
);
371 * If we hit end-of-entry last time, clean up and return
372 * ARCHIVE_EOF this time.
374 if (zip
->end_of_entry
) {
375 if (!zip
->end_of_entry_cleanup
) {
376 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
380 (a
->compression_read_ahead
)(a
, &h
, 16);
381 if (bytes_read
< 16) {
383 ARCHIVE_ERRNO_FILE_FORMAT
,
384 "Truncated ZIP end-of-file record");
385 return (ARCHIVE_FATAL
);
388 zip
->crc32
= i4(p
+ 4);
389 zip
->compressed_size
= u4(p
+ 8);
390 zip
->uncompressed_size
= u4(p
+ 12);
391 bytes_read
= (a
->compression_read_consume
)(a
, 16);
394 /* Check file size, CRC against these values. */
395 if (zip
->compressed_size
!= zip
->entry_compressed_bytes_read
) {
396 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
397 "ZIP compressed data is wrong size");
398 return (ARCHIVE_WARN
);
400 if (zip
->uncompressed_size
!= zip
->entry_uncompressed_bytes_read
) {
401 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
402 "ZIP uncompressed data is wrong size");
403 return (ARCHIVE_WARN
);
405 /* TODO: Compute CRC. */
407 if (zip->crc32 != zip->entry_crc32_calculated) {
408 archive_set_error(a, ARCHIVE_ERRNO_MISC,
409 "ZIP data CRC error");
410 return (ARCHIVE_WARN);
413 /* End-of-entry cleanup done. */
414 zip
->end_of_entry_cleanup
= 1;
416 return (ARCHIVE_EOF
);
419 switch(zip
->compression
) {
420 case 0: /* No compression. */
421 r
= zip_read_data_none(a
, buff
, size
, offset
);
423 case 8: /* Deflate compression. */
424 r
= zip_read_data_deflate(a
, buff
, size
, offset
);
426 default: /* Unsupported compression. */
430 /* Return a warning. */
431 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
432 "Unsupported ZIP compression method (%s)",
433 zip
->compression_name
);
434 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
436 * ZIP_LENGTH_AT_END requires us to
437 * decompress the entry in order to
438 * skip it, but we don't know this
439 * compression method, so we give up.
443 /* We know compressed size; just skip it. */
444 archive_read_format_zip_read_data_skip(a
);
453 * Read "uncompressed" data. According to the current specification,
454 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
455 * initial file header are supposed to be set to zero. This would, of
456 * course, make it impossible for us to read the archive, since we
457 * couldn't determine the end of the file data. Info-ZIP seems to
458 * include the real size fields both before and after the data in this
459 * case (the CRC only appears afterwards), so this works as you would
462 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
463 * zip->end_of_entry if it consumes all of the data.
466 zip_read_data_none(struct archive
*a
, const void **buff
,
467 size_t *size
, off_t
*offset
)
472 zip
= *(a
->pformat_data
);
474 if (zip
->entry_bytes_remaining
== 0) {
477 *offset
= zip
->entry_offset
;
478 zip
->end_of_entry
= 1;
482 * Note: '1' here is a performance optimization.
483 * Recall that the decompression layer returns a count of
484 * available bytes; asking for more than that forces the
485 * decompressor to combine reads by copying data.
487 bytes_avail
= (a
->compression_read_ahead
)(a
, buff
, 1);
488 if (bytes_avail
<= 0) {
489 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
490 "Truncated ZIP file data");
491 return (ARCHIVE_FATAL
);
493 if (bytes_avail
> zip
->entry_bytes_remaining
)
494 bytes_avail
= zip
->entry_bytes_remaining
;
495 (a
->compression_read_consume
)(a
, bytes_avail
);
497 *offset
= zip
->entry_offset
;
498 zip
->entry_offset
+= *size
;
499 zip
->entry_bytes_remaining
-= *size
;
500 zip
->entry_uncompressed_bytes_read
+= *size
;
501 zip
->entry_compressed_bytes_read
+= *size
;
507 zip_read_data_deflate(struct archive
*a
, const void **buff
,
508 size_t *size
, off_t
*offset
)
512 const void *compressed_buff
;
515 zip
= *(a
->pformat_data
);
517 /* If the buffer hasn't been allocated, allocate it now. */
518 if (zip
->uncompressed_buffer
== NULL
) {
519 zip
->uncompressed_buffer_size
= 32 * 1024;
520 zip
->uncompressed_buffer
521 = malloc(zip
->uncompressed_buffer_size
);
522 if (zip
->uncompressed_buffer
== NULL
) {
523 archive_set_error(a
, ENOMEM
,
524 "No memory for ZIP decompression");
525 return (ARCHIVE_FATAL
);
529 /* If we haven't yet read any data, initialize the decompressor. */
530 if (!zip
->decompress_init
) {
531 r
= inflateInit2(&zip
->stream
,
532 -15 /* Don't check for zlib header */);
534 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
535 "Can't initialize ZIP decompression.");
536 return (ARCHIVE_FATAL
);
538 zip
->decompress_init
= 1;
542 * Note: '1' here is a performance optimization.
543 * Recall that the decompression layer returns a count of
544 * available bytes; asking for more than that forces the
545 * decompressor to combine reads by copying data.
547 bytes_avail
= (a
->compression_read_ahead
)(a
, &compressed_buff
, 1);
548 if (bytes_avail
<= 0) {
549 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
550 "Truncated ZIP file body");
551 return (ARCHIVE_FATAL
);
555 * A bug in zlib.h: stream.next_in should be marked 'const'
556 * but isn't (the library never alters data through the
557 * next_in pointer, only reads it). The result: this ugly
558 * cast to remove 'const'.
560 zip
->stream
.next_in
= (void *)(uintptr_t)(const void *)compressed_buff
;
561 zip
->stream
.avail_in
= bytes_avail
;
562 zip
->stream
.total_in
= 0;
563 zip
->stream
.next_out
= zip
->uncompressed_buffer
;
564 zip
->stream
.avail_out
= zip
->uncompressed_buffer_size
;
565 zip
->stream
.total_out
= 0;
567 r
= inflate(&zip
->stream
, 0);
572 zip
->end_of_entry
= 1;
575 archive_set_error(a
, ENOMEM
,
576 "Out of memory for ZIP decompression");
577 return (ARCHIVE_FATAL
);
579 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
580 "ZIP decompression failed (%d)", r
);
581 return (ARCHIVE_FATAL
);
584 /* Consume as much as the compressor actually used. */
585 bytes_avail
= zip
->stream
.total_in
;
586 (a
->compression_read_consume
)(a
, bytes_avail
);
587 zip
->entry_bytes_remaining
-= bytes_avail
;
588 zip
->entry_compressed_bytes_read
+= bytes_avail
;
590 *offset
= zip
->entry_offset
;
591 *size
= zip
->stream
.total_out
;
592 zip
->entry_uncompressed_bytes_read
+= *size
;
593 *buff
= zip
->uncompressed_buffer
;
594 zip
->entry_offset
+= *size
;
599 zip_read_data_deflate(struct archive
*a
, const void **buff
,
600 size_t *size
, off_t
*offset
)
607 archive_set_error(a
, ARCHIVE_ERRNO_MISC
,
608 "libarchive compiled without deflate support (no libz)");
609 return (ARCHIVE_FATAL
);
614 archive_read_format_zip_read_data_skip(struct archive
*a
)
617 const void *buff
= NULL
;
620 zip
= *(a
->pformat_data
);
623 * If the length is at the end, we have no choice but
624 * to decompress all the data to find the end marker.
626 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
631 r
= archive_read_format_zip_read_data(a
, &buff
,
633 } while (r
== ARCHIVE_OK
);
638 * If the length is at the beginning, we can skip the
639 * compressed data much more quickly.
641 while (zip
->entry_bytes_remaining
> 0) {
642 bytes_avail
= (a
->compression_read_ahead
)(a
, &buff
, 1);
643 if (bytes_avail
<= 0) {
644 archive_set_error(a
, ARCHIVE_ERRNO_FILE_FORMAT
,
645 "Truncated ZIP file body");
646 return (ARCHIVE_FATAL
);
648 if (bytes_avail
> zip
->entry_bytes_remaining
)
649 bytes_avail
= zip
->entry_bytes_remaining
;
650 (a
->compression_read_consume
)(a
, bytes_avail
);
651 zip
->entry_bytes_remaining
-= bytes_avail
;
653 /* This entry is finished and done. */
654 zip
->end_of_entry_cleanup
= zip
->end_of_entry
= 1;
659 archive_read_format_zip_cleanup(struct archive
*a
)
663 zip
= *(a
->pformat_data
);
664 if (zip
->uncompressed_buffer
!= NULL
)
665 free(zip
->uncompressed_buffer
);
666 archive_string_free(&(zip
->pathname
));
667 archive_string_free(&(zip
->extra
));
669 *(a
->pformat_data
) = NULL
;
676 return ((0xff & (int)p
[0]) + 256 * (0xff & (int)p
[1]));
683 return ((0xffff & i2(p
)) + 0x10000 * (0xffff & i2(p
+2)));
689 return ((0xff & (unsigned int)p
[0]) + 256 * (0xff & (unsigned int)p
[1]));
695 return u2(p
) + 0x10000 * u2(p
+2);
701 return u4(p
) + 0x100000000LL
* u4(p
+4);
705 * The extra data is stored as a list of
706 * id1+size1+data1 + id2+size2+data2 ...
707 * triplets. id and size are 2 bytes each.
710 process_extra(const void* extra
, struct zip
* zip
)
713 const char *p
= extra
;
714 while (offset
< zip
->extra_length
- 4)
716 unsigned short headerid
= u2(p
+ offset
);
717 unsigned short datasize
= u2(p
+ offset
+ 2);
719 if (offset
+ datasize
> zip
->extra_length
)
722 fprintf(stderr
, "Header id 0x%04x, length %d\n",
727 /* Zip64 extended information extra field. */
729 zip
->uncompressed_size
= u8(p
+ offset
);
731 zip
->compressed_size
= u8(p
+ offset
+ 8);
735 /* Extended time field "UT". */
736 int flags
= p
[offset
];
739 /* Flag bits indicate which dates are present. */
743 fprintf(stderr
, "mtime: %d -> %d\n",
744 zip
->mtime
, i4(p
+ offset
));
748 zip
->mtime
= i4(p
+ offset
);
756 zip
->atime
= i4(p
+ offset
);
764 zip
->ctime
= i4(p
+ offset
);
771 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
773 fprintf(stderr
, "uid %d gid %d\n",
774 i2(p
+ offset
), i2(p
+ offset
+ 2));
777 zip
->uid
= i2(p
+ offset
);
779 zip
->gid
= i2(p
+ offset
+ 2);
787 if (offset
!= zip
->extra_length
)
790 "Extra data field contents do not match reported size!");