2 * Copyright (c) 2004 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_zip.c,v 1.12 2007/04/15 00:53:38 kientzle Exp $");
42 #include "archive_entry.h"
43 #include "archive_private.h"
44 #include "archive_read_private.h"
47 /* entry_bytes_remaining is the number of bytes we expect. */
48 int64_t entry_bytes_remaining
;
51 /* These count the number of bytes actually read for the entry. */
52 int64_t entry_compressed_bytes_read
;
53 int64_t entry_uncompressed_bytes_read
;
59 const char * compression_name
;
67 /* Flags to mark progress of decompression. */
70 char end_of_entry_cleanup
;
73 ssize_t filename_length
;
75 int64_t uncompressed_size
;
76 int64_t compressed_size
;
78 unsigned char *uncompressed_buffer
;
79 size_t uncompressed_buffer_size
;
84 struct archive_string pathname
;
85 struct archive_string extra
;
89 #define ZIP_LENGTH_AT_END 8
91 struct zip_file_header
{
98 char compressed_size
[4];
99 char uncompressed_size
[4];
100 char filename_length
[2];
101 char extra_length
[2];
104 static const char *compression_names
[] = {
116 static int archive_read_format_zip_bid(struct archive_read
*);
117 static int archive_read_format_zip_cleanup(struct archive_read
*);
118 static int archive_read_format_zip_read_data(struct archive_read
*,
119 const void **, size_t *, off_t
*);
120 static int archive_read_format_zip_read_data_skip(struct archive_read
*a
);
121 static int archive_read_format_zip_read_header(struct archive_read
*,
122 struct archive_entry
*);
123 static int i2(const char *);
124 static int i4(const char *);
125 static unsigned int u2(const char *);
126 static unsigned int u4(const char *);
127 static uint64_t u8(const char *);
128 static int zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
129 size_t *size
, off_t
*offset
);
130 static int zip_read_data_none(struct archive_read
*a
, const void **buff
,
131 size_t *size
, off_t
*offset
);
132 static int zip_read_file_header(struct archive_read
*a
,
133 struct archive_entry
*entry
, struct zip
*zip
);
134 static time_t zip_time(const char *);
135 static void process_extra(const void* extra
, struct zip
* zip
);
138 archive_read_support_format_zip(struct archive
*_a
)
140 struct archive_read
*a
= (struct archive_read
*)_a
;
144 zip
= (struct zip
*)malloc(sizeof(*zip
));
146 archive_set_error(&a
->archive
, ENOMEM
, "Can't allocate zip data");
147 return (ARCHIVE_FATAL
);
149 memset(zip
, 0, sizeof(*zip
));
151 r
= __archive_read_register_format(a
,
153 archive_read_format_zip_bid
,
154 archive_read_format_zip_read_header
,
155 archive_read_format_zip_read_data
,
156 archive_read_format_zip_read_data_skip
,
157 archive_read_format_zip_cleanup
);
166 archive_read_format_zip_bid(struct archive_read
*a
)
173 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_ZIP
)
176 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 4);
181 if (p
[0] == 'P' && p
[1] == 'K') {
183 if (p
[2] == '\001' && p
[3] == '\002')
185 else if (p
[2] == '\003' && p
[3] == '\004')
187 else if (p
[2] == '\005' && p
[3] == '\006')
189 else if (p
[2] == '\007' && p
[3] == '\010')
196 archive_read_format_zip_read_header(struct archive_read
*a
,
197 struct archive_entry
*entry
)
201 const char *signature
;
204 a
->archive
.archive_format
= ARCHIVE_FORMAT_ZIP
;
205 if (a
->archive
.archive_format_name
== NULL
)
206 a
->archive
.archive_format_name
= "ZIP";
208 zip
= (struct zip
*)(a
->format
->data
);
209 zip
->decompress_init
= 0;
210 zip
->end_of_entry
= 0;
211 zip
->end_of_entry_cleanup
= 0;
212 zip
->entry_uncompressed_bytes_read
= 0;
213 zip
->entry_compressed_bytes_read
= 0;
214 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 4);
216 return (ARCHIVE_FATAL
);
218 signature
= (const char *)h
;
219 if (signature
[0] != 'P' || signature
[1] != 'K') {
220 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
222 return (ARCHIVE_FATAL
);
225 if (signature
[2] == '\001' && signature
[3] == '\002') {
226 /* Beginning of central directory. */
227 return (ARCHIVE_EOF
);
230 if (signature
[2] == '\003' && signature
[3] == '\004') {
231 /* Regular file entry. */
232 return (zip_read_file_header(a
, entry
, zip
));
235 if (signature
[2] == '\005' && signature
[3] == '\006') {
236 /* End-of-archive record. */
237 return (ARCHIVE_EOF
);
240 if (signature
[2] == '\007' && signature
[3] == '\010') {
242 * We should never encounter this record here;
243 * see ZIP_LENGTH_AT_END handling below for details.
245 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
246 "Bad ZIP file: Unexpected end-of-entry record");
247 return (ARCHIVE_FATAL
);
250 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
251 "Damaged ZIP file or unsupported format variant (%d,%d)",
252 signature
[2], signature
[3]);
253 return (ARCHIVE_FATAL
);
257 zip_read_file_header(struct archive_read
*a
, struct archive_entry
*entry
,
260 const struct zip_file_header
*p
;
265 (a
->decompressor
->read_ahead
)(a
, &h
, sizeof(struct zip_file_header
));
266 if (bytes_read
< (int)sizeof(struct zip_file_header
)) {
267 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
268 "Truncated ZIP file header");
269 return (ARCHIVE_FATAL
);
271 p
= (const struct zip_file_header
*)h
;
273 zip
->version
= p
->version
[0];
274 zip
->system
= p
->version
[1];
275 zip
->flags
= i2(p
->flags
);
276 zip
->compression
= i2(p
->compression
);
277 if (zip
->compression
<
278 sizeof(compression_names
)/sizeof(compression_names
[0]))
279 zip
->compression_name
= compression_names
[zip
->compression
];
281 zip
->compression_name
= "??";
282 zip
->mtime
= zip_time(p
->timedate
);
288 zip
->crc32
= i4(p
->crc32
);
289 zip
->filename_length
= i2(p
->filename_length
);
290 zip
->extra_length
= i2(p
->extra_length
);
291 zip
->uncompressed_size
= u4(p
->uncompressed_size
);
292 zip
->compressed_size
= u4(p
->compressed_size
);
294 (a
->decompressor
->consume
)(a
, sizeof(struct zip_file_header
));
297 /* Read the filename. */
298 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, zip
->filename_length
);
299 if (bytes_read
< zip
->filename_length
) {
300 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
301 "Truncated ZIP file header");
302 return (ARCHIVE_FATAL
);
304 archive_string_ensure(&zip
->pathname
, zip
->filename_length
);
305 archive_strncpy(&zip
->pathname
, (const char *)h
, zip
->filename_length
);
306 (a
->decompressor
->consume
)(a
, zip
->filename_length
);
307 archive_entry_set_pathname(entry
, zip
->pathname
.s
);
309 if (zip
->pathname
.s
[archive_strlen(&zip
->pathname
) - 1] == '/')
310 zip
->mode
= AE_IFDIR
| 0777;
312 zip
->mode
= AE_IFREG
| 0777;
314 /* Read the extra data. */
315 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, zip
->extra_length
);
316 if (bytes_read
< zip
->extra_length
) {
317 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
318 "Truncated ZIP file header");
319 return (ARCHIVE_FATAL
);
321 process_extra(h
, zip
);
322 (a
->decompressor
->consume
)(a
, zip
->extra_length
);
324 /* Populate some additional entry fields: */
325 archive_entry_set_mode(entry
, zip
->mode
);
326 archive_entry_set_uid(entry
, zip
->uid
);
327 archive_entry_set_gid(entry
, zip
->gid
);
328 archive_entry_set_mtime(entry
, zip
->mtime
, 0);
329 archive_entry_set_ctime(entry
, zip
->ctime
, 0);
330 archive_entry_set_atime(entry
, zip
->atime
, 0);
331 archive_entry_set_size(entry
, zip
->uncompressed_size
);
333 zip
->entry_bytes_remaining
= zip
->compressed_size
;
334 zip
->entry_offset
= 0;
336 /* Set up a more descriptive format name. */
337 sprintf(zip
->format_name
, "ZIP %d.%d (%s)",
338 zip
->version
/ 10, zip
->version
% 10,
339 zip
->compression_name
);
340 a
->archive
.archive_format_name
= zip
->format_name
;
345 /* Convert an MSDOS-style date/time into Unix-style time. */
347 zip_time(const char *p
)
352 msTime
= (0xff & (unsigned)p
[0]) + 256 * (0xff & (unsigned)p
[1]);
353 msDate
= (0xff & (unsigned)p
[2]) + 256 * (0xff & (unsigned)p
[3]);
355 memset(&ts
, 0, sizeof(ts
));
356 ts
.tm_year
= ((msDate
>> 9) & 0x7f) + 80; /* Years since 1900. */
357 ts
.tm_mon
= ((msDate
>> 5) & 0x0f) - 1; /* Month number. */
358 ts
.tm_mday
= msDate
& 0x1f; /* Day of month. */
359 ts
.tm_hour
= (msTime
>> 11) & 0x1f;
360 ts
.tm_min
= (msTime
>> 5) & 0x3f;
361 ts
.tm_sec
= (msTime
<< 1) & 0x3e;
367 archive_read_format_zip_read_data(struct archive_read
*a
,
368 const void **buff
, size_t *size
, off_t
*offset
)
373 zip
= (struct zip
*)(a
->format
->data
);
376 * If we hit end-of-entry last time, clean up and return
377 * ARCHIVE_EOF this time.
379 if (zip
->end_of_entry
) {
380 if (!zip
->end_of_entry_cleanup
) {
381 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
385 (a
->decompressor
->read_ahead
)(a
, &h
, 16);
386 if (bytes_read
< 16) {
387 archive_set_error(&a
->archive
,
388 ARCHIVE_ERRNO_FILE_FORMAT
,
389 "Truncated ZIP end-of-file record");
390 return (ARCHIVE_FATAL
);
393 zip
->crc32
= i4(p
+ 4);
394 zip
->compressed_size
= u4(p
+ 8);
395 zip
->uncompressed_size
= u4(p
+ 12);
396 bytes_read
= (a
->decompressor
->consume
)(a
, 16);
399 /* Check file size, CRC against these values. */
400 if (zip
->compressed_size
!= zip
->entry_compressed_bytes_read
) {
401 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
402 "ZIP compressed data is wrong size");
403 return (ARCHIVE_WARN
);
405 /* Size field only stores the lower 32 bits of the actual size. */
406 if ((zip
->uncompressed_size
& UINT32_MAX
)
407 != (zip
->entry_uncompressed_bytes_read
& UINT32_MAX
)) {
408 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
409 "ZIP uncompressed data is wrong size");
410 return (ARCHIVE_WARN
);
412 /* TODO: Compute CRC. */
414 if (zip->crc32 != zip->entry_crc32_calculated) {
415 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
416 "ZIP data CRC error");
417 return (ARCHIVE_WARN);
420 /* End-of-entry cleanup done. */
421 zip
->end_of_entry_cleanup
= 1;
423 return (ARCHIVE_EOF
);
426 switch(zip
->compression
) {
427 case 0: /* No compression. */
428 r
= zip_read_data_none(a
, buff
, size
, offset
);
430 case 8: /* Deflate compression. */
431 r
= zip_read_data_deflate(a
, buff
, size
, offset
);
433 default: /* Unsupported compression. */
437 /* Return a warning. */
438 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
439 "Unsupported ZIP compression method (%s)",
440 zip
->compression_name
);
441 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
443 * ZIP_LENGTH_AT_END requires us to
444 * decompress the entry in order to
445 * skip it, but we don't know this
446 * compression method, so we give up.
450 /* We know compressed size; just skip it. */
451 archive_read_format_zip_read_data_skip(a
);
460 * Read "uncompressed" data. According to the current specification,
461 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
462 * initial file header are supposed to be set to zero. This would, of
463 * course, make it impossible for us to read the archive, since we
464 * couldn't determine the end of the file data. Info-ZIP seems to
465 * include the real size fields both before and after the data in this
466 * case (the CRC only appears afterwards), so this works as you would
469 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
470 * zip->end_of_entry if it consumes all of the data.
473 zip_read_data_none(struct archive_read
*a
, const void **buff
,
474 size_t *size
, off_t
*offset
)
479 zip
= (struct zip
*)(a
->format
->data
);
481 if (zip
->entry_bytes_remaining
== 0) {
484 *offset
= zip
->entry_offset
;
485 zip
->end_of_entry
= 1;
489 * Note: '1' here is a performance optimization.
490 * Recall that the decompression layer returns a count of
491 * available bytes; asking for more than that forces the
492 * decompressor to combine reads by copying data.
494 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
495 if (bytes_avail
<= 0) {
496 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
497 "Truncated ZIP file data");
498 return (ARCHIVE_FATAL
);
500 if (bytes_avail
> zip
->entry_bytes_remaining
)
501 bytes_avail
= zip
->entry_bytes_remaining
;
502 (a
->decompressor
->consume
)(a
, bytes_avail
);
504 *offset
= zip
->entry_offset
;
505 zip
->entry_offset
+= *size
;
506 zip
->entry_bytes_remaining
-= *size
;
507 zip
->entry_uncompressed_bytes_read
+= *size
;
508 zip
->entry_compressed_bytes_read
+= *size
;
514 zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
515 size_t *size
, off_t
*offset
)
519 const void *compressed_buff
;
522 zip
= (struct zip
*)(a
->format
->data
);
524 /* If the buffer hasn't been allocated, allocate it now. */
525 if (zip
->uncompressed_buffer
== NULL
) {
526 zip
->uncompressed_buffer_size
= 32 * 1024;
527 zip
->uncompressed_buffer
528 = (unsigned char *)malloc(zip
->uncompressed_buffer_size
);
529 if (zip
->uncompressed_buffer
== NULL
) {
530 archive_set_error(&a
->archive
, ENOMEM
,
531 "No memory for ZIP decompression");
532 return (ARCHIVE_FATAL
);
536 /* If we haven't yet read any data, initialize the decompressor. */
537 if (!zip
->decompress_init
) {
538 r
= inflateInit2(&zip
->stream
,
539 -15 /* Don't check for zlib header */);
541 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
542 "Can't initialize ZIP decompression.");
543 return (ARCHIVE_FATAL
);
545 zip
->decompress_init
= 1;
549 * Note: '1' here is a performance optimization.
550 * Recall that the decompression layer returns a count of
551 * available bytes; asking for more than that forces the
552 * decompressor to combine reads by copying data.
554 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, &compressed_buff
, 1);
555 if (bytes_avail
<= 0) {
556 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_FILE_FORMAT
,
557 "Truncated ZIP file body");
558 return (ARCHIVE_FATAL
);
562 * A bug in zlib.h: stream.next_in should be marked 'const'
563 * but isn't (the library never alters data through the
564 * next_in pointer, only reads it). The result: this ugly
565 * cast to remove 'const'.
567 zip
->stream
.next_in
= (Bytef
*)(uintptr_t)(const void *)compressed_buff
;
568 zip
->stream
.avail_in
= bytes_avail
;
569 zip
->stream
.total_in
= 0;
570 zip
->stream
.next_out
= zip
->uncompressed_buffer
;
571 zip
->stream
.avail_out
= zip
->uncompressed_buffer_size
;
572 zip
->stream
.total_out
= 0;
574 r
= inflate(&zip
->stream
, 0);
579 zip
->end_of_entry
= 1;
582 archive_set_error(&a
->archive
, ENOMEM
,
583 "Out of memory for ZIP decompression");
584 return (ARCHIVE_FATAL
);
586 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
587 "ZIP decompression failed (%d)", r
);
588 return (ARCHIVE_FATAL
);
591 /* Consume as much as the compressor actually used. */
592 bytes_avail
= zip
->stream
.total_in
;
593 (a
->decompressor
->consume
)(a
, bytes_avail
);
594 zip
->entry_bytes_remaining
-= bytes_avail
;
595 zip
->entry_compressed_bytes_read
+= bytes_avail
;
597 *offset
= zip
->entry_offset
;
598 *size
= zip
->stream
.total_out
;
599 zip
->entry_uncompressed_bytes_read
+= *size
;
600 *buff
= zip
->uncompressed_buffer
;
601 zip
->entry_offset
+= *size
;
606 zip_read_data_deflate(struct archive_read
*a
, const void **buff
,
607 size_t *size
, off_t
*offset
)
612 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
613 "libarchive compiled without deflate support (no libz)");
614 return (ARCHIVE_FATAL
);
619 archive_read_format_zip_read_data_skip(struct archive_read
*a
)
622 const void *buff
= NULL
;
625 zip
= (struct zip
*)(a
->format
->data
);
628 * If the length is at the end, we have no choice but
629 * to decompress all the data to find the end marker.
631 if (zip
->flags
& ZIP_LENGTH_AT_END
) {
636 r
= archive_read_format_zip_read_data(a
, &buff
,
638 } while (r
== ARCHIVE_OK
);
643 * If the length is at the beginning, we can skip the
644 * compressed data much more quickly.
646 while (zip
->entry_bytes_remaining
> 0) {
647 bytes_avail
= (a
->decompressor
->read_ahead
)(a
, &buff
, 1);
648 if (bytes_avail
<= 0) {
649 archive_set_error(&a
->archive
,
650 ARCHIVE_ERRNO_FILE_FORMAT
,
651 "Truncated ZIP file body");
652 return (ARCHIVE_FATAL
);
654 if (bytes_avail
> zip
->entry_bytes_remaining
)
655 bytes_avail
= zip
->entry_bytes_remaining
;
656 (a
->decompressor
->consume
)(a
, bytes_avail
);
657 zip
->entry_bytes_remaining
-= bytes_avail
;
659 /* This entry is finished and done. */
660 zip
->end_of_entry_cleanup
= zip
->end_of_entry
= 1;
665 archive_read_format_zip_cleanup(struct archive_read
*a
)
669 zip
= (struct zip
*)(a
->format
->data
);
670 if (zip
->uncompressed_buffer
!= NULL
)
671 free(zip
->uncompressed_buffer
);
672 archive_string_free(&(zip
->pathname
));
673 archive_string_free(&(zip
->extra
));
675 (a
->format
->data
) = NULL
;
682 return ((0xff & (int)p
[0]) + 256 * (0xff & (int)p
[1]));
689 return ((0xffff & i2(p
)) + 0x10000 * (0xffff & i2(p
+2)));
695 return ((0xff & (unsigned int)p
[0]) + 256 * (0xff & (unsigned int)p
[1]));
701 return u2(p
) + 0x10000 * u2(p
+2);
707 return u4(p
) + 0x100000000LL
* u4(p
+4);
711 * The extra data is stored as a list of
712 * id1+size1+data1 + id2+size2+data2 ...
713 * triplets. id and size are 2 bytes each.
716 process_extra(const void* extra
, struct zip
* zip
)
719 const char *p
= (const char *)extra
;
720 while (offset
< zip
->extra_length
- 4)
722 unsigned short headerid
= u2(p
+ offset
);
723 unsigned short datasize
= u2(p
+ offset
+ 2);
725 if (offset
+ datasize
> zip
->extra_length
)
728 fprintf(stderr
, "Header id 0x%04x, length %d\n",
733 /* Zip64 extended information extra field. */
735 zip
->uncompressed_size
= u8(p
+ offset
);
737 zip
->compressed_size
= u8(p
+ offset
+ 8);
741 /* Extended time field "UT". */
742 int flags
= p
[offset
];
745 /* Flag bits indicate which dates are present. */
749 fprintf(stderr
, "mtime: %d -> %d\n",
750 zip
->mtime
, i4(p
+ offset
));
754 zip
->mtime
= i4(p
+ offset
);
762 zip
->atime
= i4(p
+ offset
);
770 zip
->ctime
= i4(p
+ offset
);
777 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
779 fprintf(stderr
, "uid %d gid %d\n",
780 i2(p
+ offset
), i2(p
+ offset
+ 2));
783 zip
->uid
= i2(p
+ offset
);
785 zip
->gid
= i2(p
+ offset
+ 2);
793 if (offset
!= zip
->extra_length
)
796 "Extra data field contents do not match reported size!");