2 * Copyright (c) 2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_zip.c,v 1.12 2007/04/15 00:53:38 kientzle Exp $");
42 #include "archive_entry.h"
43 #include "archive_private.h"
44 #include "archive_read_private.h"
47 /* entry_bytes_remaining is the number of bytes we expect. */
48 int64_t entry_bytes_remaining
;
51 /* These count the number of bytes actually read for the entry. */
52 int64_t entry_compressed_bytes_read
;
53 int64_t entry_uncompressed_bytes_read
;
59 const char * compression_name
;
67 /* Flags to mark progress of decompression. */
70 char end_of_entry_cleanup
;
73 ssize_t filename_length
;
75 int64_t uncompressed_size
;
76 int64_t compressed_size
;
78 unsigned char *uncompressed_buffer
;
79 size_t uncompressed_buffer_size
;
85 struct archive_string pathname
;
86 struct archive_string extra
;
90 #define ZIP_LENGTH_AT_END 8
92 struct zip_file_header
{
99 char compressed_size
[4];
100 char uncompressed_size
[4];
101 char filename_length
[2];
102 char extra_length
[2];
105 static const char *compression_names
[] = {
117 static int archive_read_format_zip_bid(struct archive_read
*);
118 static int archive_read_format_zip_cleanup(struct archive_read
*);
119 static int archive_read_format_zip_read_data(struct archive_read
*,
120 const void **, size_t *, off_t
*);
121 static int archive_read_format_zip_read_data_skip(struct archive_read
*a
);
122 static int archive_read_format_zip_read_header(struct archive_read
*,
123 struct archive_entry
*);
124 static int i2(const char *);
125 static int i4(const char *);
126 static unsigned int u2(const char *);
127 static unsigned int u4(const char *);
128 static uint64_t u8(const char *);
129 static int zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
130 size_t *size
, off_t
*offset
);
131 static int zip_read_data_none(struct archive_read
*a
, const void **buff
,
132 size_t *size
, off_t
*offset
);
133 static int zip_read_file_header(struct archive_read
*a
,
134 struct archive_entry
*entry
, struct zip
*zip
);
135 static time_t zip_time(const char *);
136 static void process_extra(const void* extra
, struct zip
* zip
);
139 archive_read_support_format_zip(struct archive
*_a
)
141 struct archive_read
*a
= (struct archive_read
*)_a
;
145 zip
= (struct zip
*)malloc(sizeof(*zip
));
147 archive_set_error(&a
->archive
, ENOMEM
, "Can't allocate zip data");
148 return (ARCHIVE_FATAL
);
150 memset(zip
, 0, sizeof(*zip
));
152 r
= __archive_read_register_format(a
,
154 archive_read_format_zip_bid
,
155 archive_read_format_zip_read_header
,
156 archive_read_format_zip_read_data
,
157 archive_read_format_zip_read_data_skip
,
158 archive_read_format_zip_cleanup
);
167 archive_read_format_zip_bid(struct archive_read
*a
)
174 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_ZIP
)
177 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 4);
182 if (p
[0] == 'P' && p
[1] == 'K') {
184 if (p
[2] == '\001' && p
[3] == '\002')
186 else if (p
[2] == '\003' && p
[3] == '\004')
188 else if (p
[2] == '\005' && p
[3] == '\006')
190 else if (p
[2] == '\007' && p
[3] == '\010')
197 archive_read_format_zip_read_header(struct archive_read
*a
,
198 struct archive_entry
*entry
)
202 const char *signature
;
205 a
->archive
.archive_format
= ARCHIVE_FORMAT_ZIP
;
206 if (a
->archive
.archive_format_name
== NULL
)
207 a
->archive
.archive_format_name
= "ZIP";
209 zip
= (struct zip
*)(a
->format
->data
);
210 zip
->decompress_init
= 0;
211 zip
->end_of_entry
= 0;
212 zip
->end_of_entry_cleanup
= 0;
213 zip
->entry_uncompressed_bytes_read
= 0;
214 zip
->entry_compressed_bytes_read
= 0;
215 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 4);
217 return (ARCHIVE_FATAL
);
219 signature
= (const char *)h
;
220 if (signature
[0] != 'P' || signature
[1] != 'K') {
221 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
223 return (ARCHIVE_FATAL
);
226 if (signature
[2] == '\001' && signature
[3] == '\002') {
227 /* Beginning of central directory. */
228 return (ARCHIVE_EOF
);
231 if (signature
[2] == '\003' && signature
[3] == '\004') {
232 /* Regular file entry. */
233 return (zip_read_file_header(a
, entry
, zip
));
236 if (signature
[2] == '\005' && signature
[3] == '\006') {
237 /* End-of-archive record. */
238 return (ARCHIVE_EOF
);
241 if (signature
[2] == '\007' && signature
[3] == '\010') {
243 * We should never encounter this record here;
244 * see ZIP_LENGTH_AT_END handling below for details.
246 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
247 "Bad ZIP file: Unexpected end-of-entry record");
248 return (ARCHIVE_FATAL
);
251 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
252 "Damaged ZIP file or unsupported format variant (%d,%d)",
253 signature
[2], signature
[3]);
254 return (ARCHIVE_FATAL
);
258 zip_read_file_header(struct archive_read
*a
, struct archive_entry
*entry
,
261 const struct zip_file_header
*p
;
266 (a
->decompressor
->read_ahead
)(a
, &h
, sizeof(struct zip_file_header
));
267 if (bytes_read
< (int)sizeof(struct zip_file_header
)) {
268 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
269 "Truncated ZIP file header");
270 return (ARCHIVE_FATAL
);
272 p
= (const struct zip_file_header
*)h
;
274 zip
->version
= p
->version
[0];
275 zip
->system
= p
->version
[1];
276 zip
->flags
= i2(p
->flags
);
277 zip
->compression
= i2(p
->compression
);
278 if (zip
->compression
<
279 sizeof(compression_names
)/sizeof(compression_names
[0]))
280 zip
->compression_name
= compression_names
[zip
->compression
];
282 zip
->compression_name
= "??";
283 zip
->mtime
= zip_time(p
->timedate
);
289 zip
->crc32
= i4(p
->crc32
);
290 zip
->filename_length
= i2(p
->filename_length
);
291 zip
->extra_length
= i2(p
->extra_length
);
292 zip
->uncompressed_size
= u4(p
->uncompressed_size
);
293 zip
->compressed_size
= u4(p
->compressed_size
);
295 (a
->decompressor
->consume
)(a
, sizeof(struct zip_file_header
));
298 /* Read the filename. */
299 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, zip
->filename_length
);
300 if (bytes_read
< zip
->filename_length
) {
301 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
302 "Truncated ZIP file header");
303 return (ARCHIVE_FATAL
);
305 archive_string_ensure(&zip
->pathname
, zip
->filename_length
);
306 archive_strncpy(&zip
->pathname
, (const char *)h
, zip
->filename_length
);
307 (a
->decompressor
->consume
)(a
, zip
->filename_length
);
308 archive_entry_set_pathname(entry
, zip
->pathname
.s
);
310 if (zip
->pathname
.s
[archive_strlen(&zip
->pathname
) - 1] == '/')
311 zip
->mode
= AE_IFDIR
| 0777;
313 zip
->mode
= AE_IFREG
| 0777;
315 /* Read the extra data. */
316 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, zip
->extra_length
);
317 if (bytes_read
< zip
->extra_length
) {
318 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
319 "Truncated ZIP file header");
320 return (ARCHIVE_FATAL
);
322 process_extra(h
, zip
);
323 (a
->decompressor
->consume
)(a
, zip
->extra_length
);
325 /* Populate some additional entry fields: */
326 archive_entry_set_mode(entry
, zip
->mode
);
327 archive_entry_set_uid(entry
, zip
->uid
);
328 archive_entry_set_gid(entry
, zip
->gid
);
329 archive_entry_set_mtime(entry
, zip
->mtime
, 0);
330 archive_entry_set_ctime(entry
, zip
->ctime
, 0);
331 archive_entry_set_atime(entry
, zip
->atime
, 0);
332 archive_entry_set_size(entry
, zip
->uncompressed_size
);
334 zip
->entry_bytes_remaining
= zip
->compressed_size
;
335 zip
->entry_offset
= 0;
337 /* Set up a more descriptive format name. */
338 sprintf(zip
->format_name
, "ZIP %d.%d (%s)",
339 zip
->version
/ 10, zip
->version
% 10,
340 zip
->compression_name
);
341 a
->archive
.archive_format_name
= zip
->format_name
;
346 /* Convert an MSDOS-style date/time into Unix-style time. */
348 zip_time(const char *p
)
353 msTime
= (0xff & (unsigned)p
[0]) + 256 * (0xff & (unsigned)p
[1]);
354 msDate
= (0xff & (unsigned)p
[2]) + 256 * (0xff & (unsigned)p
[3]);
356 memset(&ts
, 0, sizeof(ts
));
357 ts
.tm_year
= ((msDate
>> 9) & 0x7f) + 80; /* Years since 1900. */
358 ts
.tm_mon
= ((msDate
>> 5) & 0x0f) - 1; /* Month number. */
359 ts
.tm_mday
= msDate
& 0x1f; /* Day of month. */
360 ts
.tm_hour
= (msTime
>> 11) & 0x1f;
361 ts
.tm_min
= (msTime
>> 5) & 0x3f;
362 ts
.tm_sec
= (msTime
<< 1) & 0x3e;
368 archive_read_format_zip_read_data(struct archive_read
*a
,
369 const void **buff
, size_t *size
, off_t
*offset
)
374 zip
= (struct zip
*)(a
->format
->data
);
377 * If we hit end-of-entry last time, clean up and return
378 * ARCHIVE_EOF this time.
380 if (zip
->end_of_entry
) {
381 if (!zip
->end_of_entry_cleanup
) {
382 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
386 (a
->decompressor
->read_ahead
)(a
, &h
, 16);
387 if (bytes_read
< 16) {
388 archive_set_error(&a
->archive
,
389 ARCHIVE_ERRNO_FILE_FORMAT
,
390 "Truncated ZIP end-of-file record");
391 return (ARCHIVE_FATAL
);
394 zip
->crc32
= i4(p
+ 4);
395 zip
->compressed_size
= u4(p
+ 8);
396 zip
->uncompressed_size
= u4(p
+ 12);
397 bytes_read
= (a
->decompressor
->consume
)(a
, 16);
400 /* Check file size, CRC against these values. */
401 if (zip
->compressed_size
!= zip
->entry_compressed_bytes_read
) {
402 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
403 "ZIP compressed data is wrong size");
404 return (ARCHIVE_WARN
);
406 /* Size field only stores the lower 32 bits of the actual size. */
407 if ((zip
->uncompressed_size
& UINT32_MAX
)
408 != (zip
->entry_uncompressed_bytes_read
& UINT32_MAX
)) {
409 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
410 "ZIP uncompressed data is wrong size");
411 return (ARCHIVE_WARN
);
413 /* TODO: Compute CRC. */
415 if (zip->crc32 != zip->entry_crc32_calculated) {
416 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
417 "ZIP data CRC error");
418 return (ARCHIVE_WARN);
421 /* End-of-entry cleanup done. */
422 zip
->end_of_entry_cleanup
= 1;
424 return (ARCHIVE_EOF
);
427 switch(zip
->compression
) {
428 case 0: /* No compression. */
429 r
= zip_read_data_none(a
, buff
, size
, offset
);
431 case 8: /* Deflate compression. */
432 r
= zip_read_data_deflate(a
, buff
, size
, offset
);
434 default: /* Unsupported compression. */
438 /* Return a warning. */
439 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
440 "Unsupported ZIP compression method (%s)",
441 zip
->compression_name
);
442 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
444 * ZIP_LENGTH_AT_END requires us to
445 * decompress the entry in order to
446 * skip it, but we don't know this
447 * compression method, so we give up.
451 /* We know compressed size; just skip it. */
452 archive_read_format_zip_read_data_skip(a
);
461 * Read "uncompressed" data. According to the current specification,
462 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
463 * initial file header are supposed to be set to zero. This would, of
464 * course, make it impossible for us to read the archive, since we
465 * couldn't determine the end of the file data. Info-ZIP seems to
466 * include the real size fields both before and after the data in this
467 * case (the CRC only appears afterwards), so this works as you would
470 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
471 * zip->end_of_entry if it consumes all of the data.
474 zip_read_data_none(struct archive_read
*a
, const void **buff
,
475 size_t *size
, off_t
*offset
)
480 zip
= (struct zip
*)(a
->format
->data
);
482 if (zip
->entry_bytes_remaining
== 0) {
485 *offset
= zip
->entry_offset
;
486 zip
->end_of_entry
= 1;
490 * Note: '1' here is a performance optimization.
491 * Recall that the decompression layer returns a count of
492 * available bytes; asking for more than that forces the
493 * decompressor to combine reads by copying data.
495 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
496 if (bytes_avail
<= 0) {
497 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
498 "Truncated ZIP file data");
499 return (ARCHIVE_FATAL
);
501 if (bytes_avail
> zip
->entry_bytes_remaining
)
502 bytes_avail
= zip
->entry_bytes_remaining
;
503 (a
->decompressor
->consume
)(a
, bytes_avail
);
505 *offset
= zip
->entry_offset
;
506 zip
->entry_offset
+= *size
;
507 zip
->entry_bytes_remaining
-= *size
;
508 zip
->entry_uncompressed_bytes_read
+= *size
;
509 zip
->entry_compressed_bytes_read
+= *size
;
515 zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
516 size_t *size
, off_t
*offset
)
520 const void *compressed_buff
;
523 zip
= (struct zip
*)(a
->format
->data
);
525 /* If the buffer hasn't been allocated, allocate it now. */
526 if (zip
->uncompressed_buffer
== NULL
) {
527 zip
->uncompressed_buffer_size
= 32 * 1024;
528 zip
->uncompressed_buffer
529 = (unsigned char *)malloc(zip
->uncompressed_buffer_size
);
530 if (zip
->uncompressed_buffer
== NULL
) {
531 archive_set_error(&a
->archive
, ENOMEM
,
532 "No memory for ZIP decompression");
533 return (ARCHIVE_FATAL
);
537 /* If we haven't yet read any data, initialize the decompressor. */
538 if (!zip
->decompress_init
) {
539 if (zip
->stream_valid
)
540 r
= inflateReset(&zip
->stream
);
542 r
= inflateInit2(&zip
->stream
,
543 -15 /* Don't check for zlib header */);
545 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
546 "Can't initialize ZIP decompression.");
547 return (ARCHIVE_FATAL
);
549 /* Stream structure has been set up. */
550 zip
->stream_valid
= 1;
551 /* We've initialized decompression for this stream. */
552 zip
->decompress_init
= 1;
556 * Note: '1' here is a performance optimization.
557 * Recall that the decompression layer returns a count of
558 * available bytes; asking for more than that forces the
559 * decompressor to combine reads by copying data.
561 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, &compressed_buff
, 1);
562 if (bytes_avail
<= 0) {
563 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
564 "Truncated ZIP file body");
565 return (ARCHIVE_FATAL
);
569 * A bug in zlib.h: stream.next_in should be marked 'const'
570 * but isn't (the library never alters data through the
571 * next_in pointer, only reads it). The result: this ugly
572 * cast to remove 'const'.
574 zip
->stream
.next_in
= (Bytef
*)(uintptr_t)(const void *)compressed_buff
;
575 zip
->stream
.avail_in
= bytes_avail
;
576 zip
->stream
.total_in
= 0;
577 zip
->stream
.next_out
= zip
->uncompressed_buffer
;
578 zip
->stream
.avail_out
= zip
->uncompressed_buffer_size
;
579 zip
->stream
.total_out
= 0;
581 r
= inflate(&zip
->stream
, 0);
586 zip
->end_of_entry
= 1;
589 archive_set_error(&a
->archive
, ENOMEM
,
590 "Out of memory for ZIP decompression");
591 return (ARCHIVE_FATAL
);
593 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
594 "ZIP decompression failed (%d)", r
);
595 return (ARCHIVE_FATAL
);
598 /* Consume as much as the compressor actually used. */
599 bytes_avail
= zip
->stream
.total_in
;
600 (a
->decompressor
->consume
)(a
, bytes_avail
);
601 zip
->entry_bytes_remaining
-= bytes_avail
;
602 zip
->entry_compressed_bytes_read
+= bytes_avail
;
604 *offset
= zip
->entry_offset
;
605 *size
= zip
->stream
.total_out
;
606 zip
->entry_uncompressed_bytes_read
+= *size
;
607 *buff
= zip
->uncompressed_buffer
;
608 zip
->entry_offset
+= *size
;
613 zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
614 size_t *size
, off_t
*offset
)
619 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
620 "libarchive compiled without deflate support (no libz)");
621 return (ARCHIVE_FATAL
);
626 archive_read_format_zip_read_data_skip(struct archive_read
*a
)
629 const void *buff
= NULL
;
632 zip
= (struct zip
*)(a
->format
->data
);
635 * If the length is at the end, we have no choice but
636 * to decompress all the data to find the end marker.
638 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
643 r
= archive_read_format_zip_read_data(a
, &buff
,
645 } while (r
== ARCHIVE_OK
);
650 * If the length is at the beginning, we can skip the
651 * compressed data much more quickly.
653 while (zip
->entry_bytes_remaining
> 0) {
654 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, &buff
, 1);
655 if (bytes_avail
<= 0) {
656 archive_set_error(&a
->archive
,
657 ARCHIVE_ERRNO_FILE_FORMAT
,
658 "Truncated ZIP file body");
659 return (ARCHIVE_FATAL
);
661 if (bytes_avail
> zip
->entry_bytes_remaining
)
662 bytes_avail
= zip
->entry_bytes_remaining
;
663 (a
->decompressor
->consume
)(a
, bytes_avail
);
664 zip
->entry_bytes_remaining
-= bytes_avail
;
666 /* This entry is finished and done. */
667 zip
->end_of_entry_cleanup
= zip
->end_of_entry
= 1;
672 archive_read_format_zip_cleanup(struct archive_read
*a
)
676 zip
= (struct zip
*)(a
->format
->data
);
678 if (zip
->stream_valid
)
679 inflateEnd(&zip
->stream
);
681 free(zip
->uncompressed_buffer
);
682 archive_string_free(&(zip
->pathname
));
683 archive_string_free(&(zip
->extra
));
685 (a
->format
->data
) = NULL
;
692 return ((0xff & (int)p
[0]) + 256 * (0xff & (int)p
[1]));
699 return ((0xffff & i2(p
)) + 0x10000 * (0xffff & i2(p
+2)));
705 return ((0xff & (unsigned int)p
[0]) + 256 * (0xff & (unsigned int)p
[1]));
711 return u2(p
) + 0x10000 * u2(p
+2);
717 return u4(p
) + 0x100000000LL
* u4(p
+4);
721 * The extra data is stored as a list of
722 * id1+size1+data1 + id2+size2+data2 ...
723 * triplets. id and size are 2 bytes each.
726 process_extra(const void* extra
, struct zip
* zip
)
729 const char *p
= (const char *)extra
;
730 while (offset
< zip
->extra_length
- 4)
732 unsigned short headerid
= u2(p
+ offset
);
733 unsigned short datasize
= u2(p
+ offset
+ 2);
735 if (offset
+ datasize
> zip
->extra_length
)
738 fprintf(stderr
, "Header id 0x%04x, length %d\n",
743 /* Zip64 extended information extra field. */
745 zip
->uncompressed_size
= u8(p
+ offset
);
747 zip
->compressed_size
= u8(p
+ offset
+ 8);
751 /* Extended time field "UT". */
752 int flags
= p
[offset
];
755 /* Flag bits indicate which dates are present. */
759 fprintf(stderr
, "mtime: %d -> %d\n",
760 zip
->mtime
, i4(p
+ offset
));
764 zip
->mtime
= i4(p
+ offset
);
772 zip
->atime
= i4(p
+ offset
);
780 zip
->ctime
= i4(p
+ offset
);
787 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
789 fprintf(stderr
, "uid %d gid %d\n",
790 i2(p
+ offset
), i2(p
+ offset
+ 2));
793 zip
->uid
= i2(p
+ offset
);
795 zip
->gid
= i2(p
+ offset
+ 2);
803 if (offset
!= zip
->extra_length
)
806 "Extra data field contents do not match reported size!");