3 * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip).
6 * These routines are fully independent from the traditional zzip
7 * implementation. They assume a readonly seekable stdio handle
8 * representing a complete zip file. The functions show how to
9 * parse the structure, find files and return a decoded bytestream.
11 * These routines are a bit simple and really here for documenting
12 * the way to access a zip file. The complexity of zip access comes
13 * from staggered reading of bytes and reposition of a filepointer in
14 * a big archive with lots of files and long compressed datastreams.
15 * Plus varaints of drop-in stdio replacements, obfuscation routines,
16 * auto fileextensions, drop-in dirent replacements, and so on...
18 * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted
19 * relation to a linear seek value as specified in zip info headers. In
20 * general it is not a problem if your system has no fseeko/ftello pair
21 * since we can fallback to fseek/ftell which limits the zip disk size
22 * to 2MiBs but the zip-storable seek values are 32bit limited anyway.
25 * Guido Draheim <guidod@gmx.de>
27 * Copyright (c) 2003,2004 Guido Draheim
28 * All rights reserved,
29 * use under the restrictions of the
30 * Lesser GNU General Public License
31 * or alternatively the restrictions
32 * of the Mozilla Public License 1.1
35 #define _LARGEFILE_SOURCE 1
36 #define _ZZIP_ENTRY_STRUCT 1
38 #include <zzip/fseeko.h>
40 #include <zzip/fetch.h>
41 #include <zzip/__mmap.h>
42 #include <zzip/__fnmatch.h>
48 #if defined ZZIP_HAVE_STRING_H
50 #elif defined ZZIP_HAVE_STRINGS_H
54 #if defined ZZIP_HAVE_STDINT_H
58 #if __STDC_VERSION__+0 > 199900L
66 #ifndef ZZIP_HAVE_FSEEKO
71 /* note that the struct zzip_entry inherits the zzip_disk_entry values
72 * and usually carries a copy of its values (in disk format!). To make the
73 * following code more readable, we use a shorthand notation for the
74 * upcast needed in C (not needed in C++) as "disk_(entry)".
76 #ifdef __zzip_entry_extends_zzip_disk_entry
77 #define disk_(_entry_) _entry_
79 #define disk_(_entry_) (& (_entry_)->head)
82 /* we try to round all seeks to the pagesize - since we do not use
83 * the sys/mmap interface we have to guess a good value here: */
86 /* ====================================================================== */
88 /* helper functions */
90 /** => zzip_entry_data_offset
91 * This functions read the correspoding struct zzip_file_header from
92 * the zip disk of the given "entry". The returned off_t points to the
93 * end of the file_header where the current fseek pointer has stopped.
94 * This is used to immediatly parse out any filename/extras block following
95 * the file_header. The return value is null on error.
98 zzip_entry_fread_file_header(ZZIP_ENTRY
* entry
,
99 struct zzip_file_header
*file_header
)
101 if (! entry
|| ! file_header
)
103 ___ zzip_off_t offset
= zzip_disk_entry_fileoffset(disk_(entry
));
104 if (0 > offset
|| offset
>= entry
->disksize
)
107 if (fseeko(entry
->diskfile
, offset
, SEEK_SET
) == -1) return 0;
108 return (fread(file_header
, sizeof(*file_header
), 1, entry
->diskfile
)
109 ? offset
+ sizeof(*file_header
) : 0);
113 /** helper functions for (fseeko) zip access api
115 * This functions returns the seekval offset of the data portion of the
116 * file referenced by the given zzip_entry. It requires an intermediate
117 * check of the file_header structure (i.e. it reads it from disk). After
118 * this call, the contained diskfile readposition is already set to the
119 * data_offset returned here. On error -1 is returned.
122 zzip_entry_data_offset(ZZIP_ENTRY
* entry
)
124 struct zzip_file_header file_header
;
127 ___ zzip_off_t offset
= zzip_entry_fread_file_header(entry
, &file_header
);
130 offset
+= zzip_file_header_sizeof_tails(&file_header
);
131 if (fseeko(entry
->diskfile
, offset
, SEEK_SET
) == -1)
137 /** => zzip_entry_data_offset
138 * This function is a big helper despite its little name: in a zip file the
139 * encoded filenames are usually NOT zero-terminated but for common usage
140 * with libc we need it that way. Secondly, the filename SHOULD be present
141 * in the zip central directory but if not then we fallback to the filename
142 * given in the file_header of each compressed data portion.
145 zzip_entry_strdup_name(ZZIP_ENTRY
* entry
)
151 if ((len
= zzip_disk_entry_namlen(disk_(entry
))))
153 char *name
= malloc(len
+ 1);
156 memcpy(name
, entry
->tail
, len
);
160 ___
auto struct zzip_file_header header
;
161 if (zzip_entry_fread_file_header(entry
, &header
)
162 && (len
= zzip_file_header_namlen(&header
)))
164 char *name
= malloc(len
+ 1);
168 zzip_size_t n
= fread(name
, 1, len
, entry
->diskfile
);
183 prescan_entry(ZZIP_ENTRY
* entry
)
186 ___ zzip_off_t tailsize
= zzip_disk_entry_sizeof_tails(disk_(entry
));
187 if (tailsize
+ 1 > entry
->tailalloc
)
189 char *newtail
= realloc(entry
->tail
, tailsize
+ 1);
192 entry
->tail
= newtail
;
193 entry
->tailalloc
= tailsize
+ 1;
195 # ifdef SIZE_MAX /* from stdint.h */
196 if (tailsize
> (zzip_off_t
)(SIZE_MAX
)) { return EFBIG
; }
198 ___ zzip_size_t readsize
= fread(entry
->tail
, 1, tailsize
, entry
->diskfile
);
199 /* name + comment + extras */
200 if ((zzip_off_t
)readsize
!= tailsize
) {
208 prescan_clear(ZZIP_ENTRY
* entry
)
214 entry
->tailalloc
= 0;
217 /* ====================================================================== */
219 /** => zzip_entry_findfile
221 * This function is the first call of all the zip access functions here.
222 * It contains the code to find the first entry of the zip central directory.
223 * Here we require the stdio handle to represent a real zip file where the
224 * disk_trailer is _last_ in the file area, so that its position would be at
225 * a fixed offset from the end of the file area if not for the comment field
226 * allowed to be of variable length (which needs us to do a little search
227 * for the disk_tailer). However, in this simple implementation we disregard
228 * any disk_trailer info telling about multidisk archives, so we just return
229 * a pointer to the first entry in the zip central directory of that file.
231 * For an actual means, we are going to search backwards from the end
232 * of the mmaped block looking for the PK-magic signature of a
233 * disk_trailer. If we see one then we check the rootseek value to
234 * find the first disk_entry of the root central directory. If we find
235 * the correct PK-magic signature of a disk_entry over there then we
236 * assume we are done and we are going to return a pointer to that label.
238 * The return value is a pointer to the first zzip_disk_entry being checked
239 * to be within the bounds of the file area specified by the arguments. If
240 * no disk_trailer was found then null is returned, and likewise we only
241 * accept a disk_trailer with a seekvalue that points to a disk_entry and
242 * both parts have valid PK-magic parts. Beyond some sanity check we try to
243 * catch a common brokeness with zip archives that still allows us to find
244 * the start of the zip central directory.
246 zzip__new__ ZZIP_ENTRY
*
247 zzip_entry_findfirst(FILE * disk
)
251 if (fseeko(disk
, 0, SEEK_END
) == -1)
253 ___ zzip_off_t disksize
= ftello(disk
);
254 if (disksize
< (zzip_off_t
) sizeof(struct zzip_disk_trailer
))
256 /* we read out chunks of 8 KiB in the hope to match disk granularity */
257 ___ zzip_off_t pagesize
= PAGESIZE
; /* getpagesize() */
258 ___ ZZIP_ENTRY
*entry
= malloc(sizeof(*entry
));
261 ___
unsigned char *buffer
= malloc(pagesize
);
265 assert(pagesize
/ 2 > (zzip_off_t
) sizeof(struct zzip_disk_trailer
));
266 /* at each step, we will fread a pagesize block which overlaps with the
267 * previous read by means of pagesize/2 step at the end of the while(1) */
268 ___ zzip_off_t mapoffs
= disksize
& ~(pagesize
- 1);
269 ___ zzip_off_t mapsize
= disksize
- mapoffs
;
270 if (mapoffs
&& mapsize
< pagesize
/ 2)
272 mapoffs
-= pagesize
/ 2;
273 mapsize
+= pagesize
/ 2;
275 assert(mapsize
< 3*8192);
278 if (fseeko(disk
, mapoffs
, SEEK_SET
) == -1)
280 if (fread(buffer
, 1, mapsize
, disk
) != (zzip_size_t
)mapsize
)
282 ___
unsigned char *p
=
283 buffer
+ mapsize
- sizeof(struct zzip_disk_trailer
);
284 for (; p
>= buffer
; p
--)
286 zzip_off_t root
; /* (struct zzip_disk_entry*) */
287 if (zzip_disk_trailer_check_magic(p
))
289 root
= zzip_disk_trailer_rootseek((struct zzip_disk_trailer
*)
291 if (root
> disksize
- (long) sizeof(struct zzip_disk_trailer
))
293 /* first disk_entry is after the disk_trailer? can't be! */
294 struct zzip_disk_trailer
*trailer
=
295 (struct zzip_disk_trailer
*) p
;
296 zzip_off_t rootsize
= zzip_disk_trailer_rootsize(trailer
);
297 if (rootsize
> mapoffs
)
299 /* a common brokeness that can be fixed: we just assume the
300 * central directory was written directly before : */
301 root
= mapoffs
- rootsize
;
303 } else if (zzip_disk64_trailer_check_magic(p
))
305 struct zzip_disk64_trailer
*trailer
=
306 (struct zzip_disk64_trailer
*) p
;
307 if (sizeof(zzip_off_t
) < 8)
309 root
= zzip_disk64_trailer_rootseek(trailer
);
313 assert(0 <= root
&& root
< mapsize
);
314 if (fseeko(disk
, root
, SEEK_SET
) == -1)
316 if (fread(disk_(entry
), 1, sizeof(*disk_(entry
)), disk
)
317 != sizeof(*disk_(entry
))) goto error
;
318 if (zzip_disk_entry_check_magic(entry
))
321 entry
->headseek
= root
;
322 entry
->diskfile
= disk
;
323 entry
->disksize
= disksize
;
324 if (prescan_entry(entry
))
332 assert(mapsize
>= pagesize
/ 2);
333 mapoffs
-= pagesize
/ 2; /* mapsize += pagesize/2; */
334 mapsize
= pagesize
; /* if (mapsize > pagesize) ... */
335 if (disksize
- mapoffs
> 64 * 1024)
351 /** => zzip_entry_findfile
353 * This function takes an existing "entry" in the central root directory
354 * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
355 * On error it returns 0, otherwise the old entry. If no further match is
356 * found then null is returned and the entry already free()d. If you want
357 * to stop searching for matches before that case then please call
358 * => zzip_entry_free on the cursor struct ZZIP_ENTRY.
360 zzip__new__ ZZIP_ENTRY
*
361 zzip_entry_findnext(ZZIP_ENTRY
* _zzip_restrict entry
)
365 if (! zzip_disk_entry_check_magic(entry
))
367 ___ zzip_off_t seek
=
368 entry
->headseek
+ zzip_disk_entry_sizeto_end(disk_(entry
));
369 if (seek
+ (zzip_off_t
) sizeof(*disk_(entry
)) > entry
->disksize
)
372 if (fseeko(entry
->diskfile
, seek
, SEEK_SET
) == -1)
374 if (fread(disk_(entry
), 1, sizeof(*disk_(entry
)), entry
->diskfile
)
375 != sizeof(*disk_(entry
))) goto err
;
376 entry
->headseek
= seek
;
377 if (! zzip_disk_entry_check_magic(entry
))
379 if (prescan_entry(entry
))
383 zzip_entry_free(entry
);
388 /** => zzip_entry_findfile
389 * this function releases the malloc()ed areas needed for zzip_entry, the
390 * pointer is invalid afterwards. This function has #define synonyms of
391 * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch()
394 zzip_entry_free(ZZIP_ENTRY
* entry
)
398 prescan_clear(entry
);
403 /** search for files in the (fseeko) zip central directory
405 * This function is given a filename as an additional argument, to find the
406 * disk_entry matching a given filename. The compare-function is usually
407 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
408 * - use null as argument for "old"-entry when searching the first
409 * matching entry, otherwise the last returned value if you look for other
410 * entries with a special "compare" function (if null then a doubled search
411 * is rather useless with this variant of _findfile). If no further entry is
412 * found then null is returned and any "old"-entry gets already free()d.
414 zzip__new__ ZZIP_ENTRY
*
415 zzip_entry_findfile(FILE * disk
, char *filename
,
416 ZZIP_ENTRY
* _zzip_restrict entry
, zzip_strcmp_fn_t compare
)
418 if (! filename
|| ! disk
)
421 entry
= zzip_entry_findfirst(disk
);
423 entry
= zzip_entry_findnext(entry
);
426 compare
= (zzip_strcmp_fn_t
) (strcmp
);
428 for (; entry
; entry
= zzip_entry_findnext(entry
))
430 /* filenames within zip files are often not null-terminated! */
431 char *realname
= zzip_entry_strdup_name(entry
);
434 if (! compare(filename
, realname
))
447 /** => zzip_entry_findfile
449 * This function uses a compare-function with an additional argument
450 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
451 * the argument filespec first and the ziplocal filename second with
452 * the integer-flags put in as third to the indirect call. If the
453 * platform has fnmatch available then null-compare will use that one
454 * and otherwise we fall back to mere strcmp, so if you need fnmatch
455 * searching then please provide an implementation somewhere else.
456 * - use null as argument for "after"-entry when searching the first
457 * matching entry, or the last disk_entry return-value to find the
458 * next entry matching the given filespec. If no further entry is
459 * found then null is returned and any "old"-entry gets already free()d.
461 zzip__new__ ZZIP_ENTRY
*
462 zzip_entry_findmatch(FILE * disk
, char *filespec
,
463 ZZIP_ENTRY
* _zzip_restrict entry
,
464 zzip_fnmatch_fn_t compare
, int flags
)
466 if (! filespec
|| ! disk
)
469 entry
= zzip_entry_findfirst(disk
);
471 entry
= zzip_entry_findnext(entry
);
474 compare
= (zzip_fnmatch_fn_t
) _zzip_fnmatch
;
476 for (; entry
; entry
= zzip_entry_findnext(entry
))
478 /* filenames within zip files are often not null-terminated! */
479 char *realname
= zzip_entry_strdup_name(entry
);
482 if (! compare(filespec
, realname
, flags
))
495 /* ====================================================================== */
498 * typedef struct zzip_disk_file ZZIP_ENTRY_FILE;
500 struct zzip_entry_file
/* : zzip_file_header */
502 struct zzip_file_header header
; /* fopen detected header */
503 ZZIP_ENTRY
*entry
; /* fopen entry */
504 zzip_off_t data
; /* for stored blocks */
505 zzip_size_t avail
; /* memorized for checks on EOF */
506 zzip_size_t compressed
; /* compressed flag and datasize */
507 zzip_size_t dataoff
; /* offset from data start */
508 z_stream zlib
; /* for inflated blocks */
509 unsigned char buffer
[PAGESIZE
]; /* work buffer for inflate algorithm */
512 /** open a file within a zip disk for reading
514 * This function does take an "entry" argument and copies it (or just takes
515 * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
516 * structure contains also a zlib buffer for decoding. This function does
517 * seek to the file_header of the given "entry" and validates it for the
518 * data buffer following it. We do also prefetch some data from the data
519 * buffer thereby trying to match the disk pagesize for faster access later.
520 * The => zzip_entry_fread will then read in chunks of pagesizes which is
521 * the size of the internal readahead buffer. If an error occurs then null
524 zzip__new__ ZZIP_ENTRY_FILE
*
525 zzip_entry_fopen(ZZIP_ENTRY
* entry
, int takeover
)
531 ZZIP_ENTRY
*found
= malloc(sizeof(*entry
));
534 memcpy(found
, entry
, sizeof(*entry
)); /* prescan_copy */
535 found
->tail
= malloc(found
->tailalloc
);
537 { free (found
); return 0; }
538 memcpy(found
->tail
, entry
->tail
, entry
->tailalloc
);
541 ___ ZZIP_ENTRY_FILE
*file
= malloc(sizeof(*file
));
545 if (! zzip_entry_fread_file_header(entry
, &file
->header
))
547 file
->avail
= zzip_file_header_usize(&file
->header
);
548 file
->data
= zzip_entry_data_offset(entry
);
551 if (! file
->avail
|| zzip_file_header_data_stored(&file
->header
))
552 { file
->compressed
= 0; return file
; }
554 file
->compressed
= zzip_file_header_csize(&file
->header
);
555 file
->zlib
.opaque
= 0;
556 file
->zlib
.zalloc
= Z_NULL
;
557 file
->zlib
.zfree
= Z_NULL
;
559 ___ zzip_off_t seek
= file
->data
;
560 seek
+= sizeof(file
->buffer
);
561 seek
-= seek
& (sizeof(file
->buffer
) - 1);
562 assert(file
->data
< seek
); /* pre-read to next PAGESIZE boundary... */
563 if (fseeko(file
->entry
->diskfile
, file
->data
+ file
->dataoff
, SEEK_SET
) == -1)
565 file
->zlib
.next_in
= file
->buffer
;
566 file
->zlib
.avail_in
= fread(file
->buffer
, 1, seek
- file
->data
,
567 file
->entry
->diskfile
);
568 file
->dataoff
+= file
->zlib
.avail_in
;
571 if (! zzip_file_header_data_deflated(&file
->header
)
572 || inflateInit2(&file
->zlib
, -MAX_WBITS
) != Z_OK
)
579 zzip_entry_free(entry
);
584 /** => zzip_entry_fopen
586 * This function opens a file found by name, so it does a search into
587 * the zip central directory with => zzip_entry_findfile and whatever
588 * is found first is given to => zzip_entry_fopen
590 zzip__new__ ZZIP_ENTRY_FILE
*
591 zzip_entry_ffile(FILE * disk
, char *filename
)
593 ZZIP_ENTRY
*entry
= zzip_entry_findfile(disk
, filename
, 0, 0);
596 return zzip_entry_fopen(entry
, 1);
600 /** => zzip_entry_fopen
602 * This function reads more bytes into the output buffer specified as
603 * arguments. The return value is null on eof or error, the stdio-like
604 * interface can not distinguish between these so you need to check
605 * with => zzip_entry_feof for the difference.
608 zzip_entry_fread(void *ptr
, zzip_size_t sized
, zzip_size_t nmemb
,
609 ZZIP_ENTRY_FILE
* file
)
613 ___ zzip_size_t size
= sized
* nmemb
;
614 if (! file
->compressed
)
616 if (size
> file
->avail
)
618 if (fread(ptr
, 1, size
, file
->entry
->diskfile
) != size
) return 0;
619 file
->dataoff
+= size
;
624 file
->zlib
.avail_out
= size
;
625 file
->zlib
.next_out
= ptr
;
626 ___ zzip_size_t total_old
= file
->zlib
.total_out
;
629 if (! file
->zlib
.avail_in
)
631 size
= file
->compressed
- file
->dataoff
;
632 if (size
> sizeof(file
->buffer
))
633 size
= sizeof(file
->buffer
);
634 /* fseek (file->data + file->dataoff, file->entry->diskfile); */
635 file
->zlib
.avail_in
= fread(file
->buffer
, 1, size
,
636 file
->entry
->diskfile
);
637 file
->zlib
.next_in
= file
->buffer
;
638 file
->dataoff
+= file
->zlib
.avail_in
;
640 if (! file
->zlib
.avail_in
)
643 ___
int err
= inflate(&file
->zlib
, Z_NO_FLUSH
);
644 if (err
== Z_STREAM_END
)
646 else if (err
== Z_OK
)
647 file
->avail
-= file
->zlib
.total_out
- total_old
;
651 if (file
->zlib
.avail_out
&& ! file
->zlib
.avail_in
)
653 return file
->zlib
.total_out
- total_old
;
659 /** => zzip_entry_fopen
660 * This function releases any zlib decoder info needed for decompression
661 * and dumps the ZZIP_ENTRY_FILE struct then.
664 zzip_entry_fclose(ZZIP_ENTRY_FILE
* file
)
668 if (file
->compressed
)
669 inflateEnd(&file
->zlib
);
670 zzip_entry_free(file
->entry
);
675 /** => zzip_entry_fopen
677 * This function allows to distinguish an error from an eof condition.
678 * Actually, if we found an error but we did already reach eof then we
679 * just keep on saying that it was an eof, so the app can just continue.
682 zzip_entry_feof(ZZIP_ENTRY_FILE
* file
)
684 return ! file
|| ! file
->avail
;