beta-0.89.2
[luatex.git] / source / libs / zziplib / zziplib-0.13.62 / zzip / fseeko.c
blobd99067a0b4ff61af17a5404baf44d5581e7f46bf
2 /*
3 * NOTE: this is part of libzzipfseeko (i.e. it is not libzzip).
4 * ==================
6 * These routines are fully independent from the traditional zzip
7 * implementation. They assume a readonly seekable stdio handle
8 * representing a complete zip file. The functions show how to
9 * parse the structure, find files and return a decoded bytestream.
11 * These routines are a bit simple and really here for documenting
12 * the way to access a zip file. The complexity of zip access comes
13 * from staggered reading of bytes and reposition of a filepointer in
14 * a big archive with lots of files and long compressed datastreams.
15 * Plus varaints of drop-in stdio replacements, obfuscation routines,
16 * auto fileextensions, drop-in dirent replacements, and so on...
18 * btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted
19 * relation to a linear seek value as specified in zip info headers. In
20 * general it is not a problem if your system has no fseeko/ftello pair
21 * since we can fallback to fseek/ftell which limits the zip disk size
22 * to 2MiBs but the zip-storable seek values are 32bit limited anyway.
24 * Author:
25 * Guido Draheim <guidod@gmx.de>
27 * Copyright (c) 2003,2004 Guido Draheim
28 * All rights reserved,
29 * use under the restrictions of the
30 * Lesser GNU General Public License
31 * or alternatively the restrictions
32 * of the Mozilla Public License 1.1
35 #define _LARGEFILE_SOURCE 1
36 #define _ZZIP_ENTRY_STRUCT 1
38 #include <zzip/fseeko.h>
40 #include <zzip/fetch.h>
41 #include <zzip/__mmap.h>
42 #include <zzip/__fnmatch.h>
44 #include <assert.h>
45 #include <stdlib.h>
46 #include <sys/stat.h>
48 #if defined ZZIP_HAVE_STRING_H
49 #include <string.h>
50 #elif defined ZZIP_HAVE_STRINGS_H
51 #include <strings.h>
52 #endif
54 #if defined ZZIP_HAVE_STDINT_H
55 #include <stdint.h>
56 #endif
58 #if __STDC_VERSION__+0 > 199900L
59 #define ___
60 #define ____
61 #else
62 #define ___ {
63 #define ____ }
64 #endif
66 #ifndef ZZIP_HAVE_FSEEKO
67 #define fseeko fseek
68 #define ftello ftell
69 #endif
71 /* note that the struct zzip_entry inherits the zzip_disk_entry values
72 * and usually carries a copy of its values (in disk format!). To make the
73 * following code more readable, we use a shorthand notation for the
74 * upcast needed in C (not needed in C++) as "disk_(entry)".
76 #ifdef __zzip_entry_extends_zzip_disk_entry
77 #define disk_(_entry_) _entry_
78 #else
79 #define disk_(_entry_) (& (_entry_)->head)
80 #endif
82 /* we try to round all seeks to the pagesize - since we do not use
83 * the sys/mmap interface we have to guess a good value here: */
84 #define PAGESIZE 8192
86 /* ====================================================================== */
88 /* helper functions */
90 /** => zzip_entry_data_offset
91 * This functions read the correspoding struct zzip_file_header from
92 * the zip disk of the given "entry". The returned off_t points to the
93 * end of the file_header where the current fseek pointer has stopped.
94 * This is used to immediatly parse out any filename/extras block following
95 * the file_header. The return value is null on error.
97 static zzip_off_t
98 zzip_entry_fread_file_header(ZZIP_ENTRY * entry,
99 struct zzip_file_header *file_header)
101 if (! entry || ! file_header)
102 return 0;
103 ___ zzip_off_t offset = zzip_disk_entry_fileoffset(disk_(entry));
104 if (0 > offset || offset >= entry->disksize)
105 return 0;
107 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1) return 0;
108 return (fread(file_header, sizeof(*file_header), 1, entry->diskfile)
109 ? offset + sizeof(*file_header) : 0);
110 ____;
113 /** helper functions for (fseeko) zip access api
115 * This functions returns the seekval offset of the data portion of the
116 * file referenced by the given zzip_entry. It requires an intermediate
117 * check of the file_header structure (i.e. it reads it from disk). After
118 * this call, the contained diskfile readposition is already set to the
119 * data_offset returned here. On error -1 is returned.
121 zzip_off_t
122 zzip_entry_data_offset(ZZIP_ENTRY * entry)
124 struct zzip_file_header file_header;
125 if (! entry)
126 return -1;
127 ___ zzip_off_t offset = zzip_entry_fread_file_header(entry, &file_header);
128 if (! offset)
129 return -1;
130 offset += zzip_file_header_sizeof_tails(&file_header);
131 if (fseeko(entry->diskfile, offset, SEEK_SET) == -1)
132 return -1;
133 return offset;
134 ____;
137 /** => zzip_entry_data_offset
138 * This function is a big helper despite its little name: in a zip file the
139 * encoded filenames are usually NOT zero-terminated but for common usage
140 * with libc we need it that way. Secondly, the filename SHOULD be present
141 * in the zip central directory but if not then we fallback to the filename
142 * given in the file_header of each compressed data portion.
144 zzip__new__ char *
145 zzip_entry_strdup_name(ZZIP_ENTRY * entry)
147 if (! entry)
148 return 0;
150 ___ zzip_size_t len;
151 if ((len = zzip_disk_entry_namlen(disk_(entry))))
153 char *name = malloc(len + 1);
154 if (! name)
155 return 0;
156 memcpy(name, entry->tail, len);
157 name[len] = '\0';
158 return name;
160 ___ auto struct zzip_file_header header;
161 if (zzip_entry_fread_file_header(entry, &header)
162 && (len = zzip_file_header_namlen(&header)))
164 char *name = malloc(len + 1);
165 if (! name) {
166 return 0;
167 } else {
168 zzip_size_t n = fread(name, 1, len, entry->diskfile);
169 if (n != len) {
170 free (name);
171 return 0;
173 name[n] = '\0';
174 return name;
177 return 0;
178 ____;
179 ____;
182 static int
183 prescan_entry(ZZIP_ENTRY * entry)
185 assert(entry);
186 ___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails(disk_(entry));
187 if (tailsize + 1 > entry->tailalloc)
189 char *newtail = realloc(entry->tail, tailsize + 1);
190 if (! newtail)
191 return ENOMEM;
192 entry->tail = newtail;
193 entry->tailalloc = tailsize + 1;
195 # ifdef SIZE_MAX /* from stdint.h */
196 if (tailsize > (zzip_off_t)(SIZE_MAX)) { return EFBIG; }
197 # endif
198 ___ zzip_size_t readsize = fread(entry->tail, 1, tailsize, entry->diskfile);
199 /* name + comment + extras */
200 if ((zzip_off_t)readsize != tailsize) {
201 return errno;
202 } else {
203 return 0;
204 } ____; ____;
207 static void
208 prescan_clear(ZZIP_ENTRY * entry)
210 assert(entry);
211 if (entry->tail)
212 free(entry->tail);
213 entry->tail = 0;
214 entry->tailalloc = 0;
217 /* ====================================================================== */
219 /** => zzip_entry_findfile
221 * This function is the first call of all the zip access functions here.
222 * It contains the code to find the first entry of the zip central directory.
223 * Here we require the stdio handle to represent a real zip file where the
224 * disk_trailer is _last_ in the file area, so that its position would be at
225 * a fixed offset from the end of the file area if not for the comment field
226 * allowed to be of variable length (which needs us to do a little search
227 * for the disk_tailer). However, in this simple implementation we disregard
228 * any disk_trailer info telling about multidisk archives, so we just return
229 * a pointer to the first entry in the zip central directory of that file.
231 * For an actual means, we are going to search backwards from the end
232 * of the mmaped block looking for the PK-magic signature of a
233 * disk_trailer. If we see one then we check the rootseek value to
234 * find the first disk_entry of the root central directory. If we find
235 * the correct PK-magic signature of a disk_entry over there then we
236 * assume we are done and we are going to return a pointer to that label.
238 * The return value is a pointer to the first zzip_disk_entry being checked
239 * to be within the bounds of the file area specified by the arguments. If
240 * no disk_trailer was found then null is returned, and likewise we only
241 * accept a disk_trailer with a seekvalue that points to a disk_entry and
242 * both parts have valid PK-magic parts. Beyond some sanity check we try to
243 * catch a common brokeness with zip archives that still allows us to find
244 * the start of the zip central directory.
246 zzip__new__ ZZIP_ENTRY *
247 zzip_entry_findfirst(FILE * disk)
249 if (! disk)
250 return 0;
251 if (fseeko(disk, 0, SEEK_END) == -1)
252 return 0;
253 ___ zzip_off_t disksize = ftello(disk);
254 if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer))
255 return 0;
256 /* we read out chunks of 8 KiB in the hope to match disk granularity */
257 ___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */
258 ___ ZZIP_ENTRY *entry = malloc(sizeof(*entry));
259 if (! entry)
260 return 0;
261 ___ unsigned char *buffer = malloc(pagesize);
262 if (! buffer)
263 goto nomem;
265 assert(pagesize / 2 > (zzip_off_t) sizeof(struct zzip_disk_trailer));
266 /* at each step, we will fread a pagesize block which overlaps with the
267 * previous read by means of pagesize/2 step at the end of the while(1) */
268 ___ zzip_off_t mapoffs = disksize & ~(pagesize - 1);
269 ___ zzip_off_t mapsize = disksize - mapoffs;
270 if (mapoffs && mapsize < pagesize / 2)
272 mapoffs -= pagesize / 2;
273 mapsize += pagesize / 2;
275 assert(mapsize < 3*8192);
276 while (1)
278 if (fseeko(disk, mapoffs, SEEK_SET) == -1)
279 goto error;
280 if (fread(buffer, 1, mapsize, disk) != (zzip_size_t)mapsize)
281 goto error;
282 ___ unsigned char *p =
283 buffer + mapsize - sizeof(struct zzip_disk_trailer);
284 for (; p >= buffer; p--)
286 zzip_off_t root; /* (struct zzip_disk_entry*) */
287 if (zzip_disk_trailer_check_magic(p))
289 root = zzip_disk_trailer_rootseek((struct zzip_disk_trailer *)
291 if (root > disksize - (long) sizeof(struct zzip_disk_trailer))
293 /* first disk_entry is after the disk_trailer? can't be! */
294 struct zzip_disk_trailer *trailer =
295 (struct zzip_disk_trailer *) p;
296 zzip_off_t rootsize = zzip_disk_trailer_rootsize(trailer);
297 if (rootsize > mapoffs)
298 continue;
299 /* a common brokeness that can be fixed: we just assume the
300 * central directory was written directly before : */
301 root = mapoffs - rootsize;
303 } else if (zzip_disk64_trailer_check_magic(p))
305 struct zzip_disk64_trailer *trailer =
306 (struct zzip_disk64_trailer *) p;
307 if (sizeof(zzip_off_t) < 8)
308 return 0;
309 root = zzip_disk64_trailer_rootseek(trailer);
310 } else
311 continue;
313 assert(0 <= root && root < mapsize);
314 if (fseeko(disk, root, SEEK_SET) == -1)
315 goto error;
316 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), disk)
317 != sizeof(*disk_(entry))) goto error;
318 if (zzip_disk_entry_check_magic(entry))
320 free(buffer);
321 entry->headseek = root;
322 entry->diskfile = disk;
323 entry->disksize = disksize;
324 if (prescan_entry(entry))
325 goto nomem;
326 return entry;
329 ____;
330 if (! mapoffs)
331 break;
332 assert(mapsize >= pagesize / 2);
333 mapoffs -= pagesize / 2; /* mapsize += pagesize/2; */
334 mapsize = pagesize; /* if (mapsize > pagesize) ... */
335 if (disksize - mapoffs > 64 * 1024)
336 break;
338 error:
339 free(buffer);
340 nomem:
341 free(entry);
342 ____;
343 ____;
344 ____;
345 ____;
346 ____;
347 ____;
348 return 0;
351 /** => zzip_entry_findfile
353 * This function takes an existing "entry" in the central root directory
354 * (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
355 * On error it returns 0, otherwise the old entry. If no further match is
356 * found then null is returned and the entry already free()d. If you want
357 * to stop searching for matches before that case then please call
358 * => zzip_entry_free on the cursor struct ZZIP_ENTRY.
360 zzip__new__ ZZIP_ENTRY *
361 zzip_entry_findnext(ZZIP_ENTRY * _zzip_restrict entry)
363 if (! entry)
364 return entry;
365 if (! zzip_disk_entry_check_magic(entry))
366 goto err;
367 ___ zzip_off_t seek =
368 entry->headseek + zzip_disk_entry_sizeto_end(disk_(entry));
369 if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize)
370 goto err;
372 if (fseeko(entry->diskfile, seek, SEEK_SET) == -1)
373 goto err;
374 if (fread(disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile)
375 != sizeof(*disk_(entry))) goto err;
376 entry->headseek = seek;
377 if (! zzip_disk_entry_check_magic(entry))
378 goto err;
379 if (prescan_entry(entry))
380 goto err;
381 return entry;
382 err:
383 zzip_entry_free(entry);
384 return 0;
385 ____;
388 /** => zzip_entry_findfile
389 * this function releases the malloc()ed areas needed for zzip_entry, the
390 * pointer is invalid afterwards. This function has #define synonyms of
391 * zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch()
394 zzip_entry_free(ZZIP_ENTRY * entry)
396 if (! entry)
397 return 0;
398 prescan_clear(entry);
399 free(entry);
400 return 1;
403 /** search for files in the (fseeko) zip central directory
405 * This function is given a filename as an additional argument, to find the
406 * disk_entry matching a given filename. The compare-function is usually
407 * strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
408 * - use null as argument for "old"-entry when searching the first
409 * matching entry, otherwise the last returned value if you look for other
410 * entries with a special "compare" function (if null then a doubled search
411 * is rather useless with this variant of _findfile). If no further entry is
412 * found then null is returned and any "old"-entry gets already free()d.
414 zzip__new__ ZZIP_ENTRY *
415 zzip_entry_findfile(FILE * disk, char *filename,
416 ZZIP_ENTRY * _zzip_restrict entry, zzip_strcmp_fn_t compare)
418 if (! filename || ! disk)
419 return 0;
420 if (! entry)
421 entry = zzip_entry_findfirst(disk);
422 else
423 entry = zzip_entry_findnext(entry);
425 if (! compare)
426 compare = (zzip_strcmp_fn_t) (strcmp);
428 for (; entry; entry = zzip_entry_findnext(entry))
430 /* filenames within zip files are often not null-terminated! */
431 char *realname = zzip_entry_strdup_name(entry);
432 if (! realname)
433 continue;
434 if (! compare(filename, realname))
436 free(realname);
437 return entry;
438 } else
440 free(realname);
441 continue;
444 return 0;
447 /** => zzip_entry_findfile
449 * This function uses a compare-function with an additional argument
450 * and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
451 * the argument filespec first and the ziplocal filename second with
452 * the integer-flags put in as third to the indirect call. If the
453 * platform has fnmatch available then null-compare will use that one
454 * and otherwise we fall back to mere strcmp, so if you need fnmatch
455 * searching then please provide an implementation somewhere else.
456 * - use null as argument for "after"-entry when searching the first
457 * matching entry, or the last disk_entry return-value to find the
458 * next entry matching the given filespec. If no further entry is
459 * found then null is returned and any "old"-entry gets already free()d.
461 zzip__new__ ZZIP_ENTRY *
462 zzip_entry_findmatch(FILE * disk, char *filespec,
463 ZZIP_ENTRY * _zzip_restrict entry,
464 zzip_fnmatch_fn_t compare, int flags)
466 if (! filespec || ! disk)
467 return 0;
468 if (! entry)
469 entry = zzip_entry_findfirst(disk);
470 else
471 entry = zzip_entry_findnext(entry);
473 if (! compare)
474 compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
476 for (; entry; entry = zzip_entry_findnext(entry))
478 /* filenames within zip files are often not null-terminated! */
479 char *realname = zzip_entry_strdup_name(entry);
480 if (! realname)
481 continue;
482 if (! compare(filespec, realname, flags))
484 free(realname);
485 return entry;
486 } else
488 free(realname);
489 continue;
492 return 0;
495 /* ====================================================================== */
498 * typedef struct zzip_disk_file ZZIP_ENTRY_FILE;
500 struct zzip_entry_file /* : zzip_file_header */
502 struct zzip_file_header header; /* fopen detected header */
503 ZZIP_ENTRY *entry; /* fopen entry */
504 zzip_off_t data; /* for stored blocks */
505 zzip_size_t avail; /* memorized for checks on EOF */
506 zzip_size_t compressed; /* compressed flag and datasize */
507 zzip_size_t dataoff; /* offset from data start */
508 z_stream zlib; /* for inflated blocks */
509 unsigned char buffer[PAGESIZE]; /* work buffer for inflate algorithm */
512 /** open a file within a zip disk for reading
514 * This function does take an "entry" argument and copies it (or just takes
515 * it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
516 * structure contains also a zlib buffer for decoding. This function does
517 * seek to the file_header of the given "entry" and validates it for the
518 * data buffer following it. We do also prefetch some data from the data
519 * buffer thereby trying to match the disk pagesize for faster access later.
520 * The => zzip_entry_fread will then read in chunks of pagesizes which is
521 * the size of the internal readahead buffer. If an error occurs then null
522 * is returned.
524 zzip__new__ ZZIP_ENTRY_FILE *
525 zzip_entry_fopen(ZZIP_ENTRY * entry, int takeover)
527 if (! entry)
528 return 0;
529 if (! takeover)
531 ZZIP_ENTRY *found = malloc(sizeof(*entry));
532 if (! found)
533 return 0;
534 memcpy(found, entry, sizeof(*entry)); /* prescan_copy */
535 found->tail = malloc(found->tailalloc);
536 if (! found->tail)
537 { free (found); return 0; }
538 memcpy(found->tail, entry->tail, entry->tailalloc);
539 entry = found;
541 ___ ZZIP_ENTRY_FILE *file = malloc(sizeof(*file));
542 if (! file)
543 goto fail1;
544 file->entry = entry;
545 if (! zzip_entry_fread_file_header(entry, &file->header))
546 goto fail2;
547 file->avail = zzip_file_header_usize(&file->header);
548 file->data = zzip_entry_data_offset(entry);
549 file->dataoff = 0;
551 if (! file->avail || zzip_file_header_data_stored(&file->header))
552 { file->compressed = 0; return file; }
554 file->compressed = zzip_file_header_csize(&file->header);
555 file->zlib.opaque = 0;
556 file->zlib.zalloc = Z_NULL;
557 file->zlib.zfree = Z_NULL;
559 ___ zzip_off_t seek = file->data;
560 seek += sizeof(file->buffer);
561 seek -= seek & (sizeof(file->buffer) - 1);
562 assert(file->data < seek); /* pre-read to next PAGESIZE boundary... */
563 if (fseeko(file->entry->diskfile, file->data + file->dataoff, SEEK_SET) == -1)
564 goto fail2;
565 file->zlib.next_in = file->buffer;
566 file->zlib.avail_in = fread(file->buffer, 1, seek - file->data,
567 file->entry->diskfile);
568 file->dataoff += file->zlib.avail_in;
569 ____;
571 if (! zzip_file_header_data_deflated(&file->header)
572 || inflateInit2(&file->zlib, -MAX_WBITS) != Z_OK)
573 goto fail2;
575 return file;
576 fail2:
577 free(file);
578 fail1:
579 zzip_entry_free(entry);
580 return 0;
581 ____;
584 /** => zzip_entry_fopen
586 * This function opens a file found by name, so it does a search into
587 * the zip central directory with => zzip_entry_findfile and whatever
588 * is found first is given to => zzip_entry_fopen
590 zzip__new__ ZZIP_ENTRY_FILE *
591 zzip_entry_ffile(FILE * disk, char *filename)
593 ZZIP_ENTRY *entry = zzip_entry_findfile(disk, filename, 0, 0);
594 if (! entry)
595 return 0;
596 return zzip_entry_fopen(entry, 1);
600 /** => zzip_entry_fopen
602 * This function reads more bytes into the output buffer specified as
603 * arguments. The return value is null on eof or error, the stdio-like
604 * interface can not distinguish between these so you need to check
605 * with => zzip_entry_feof for the difference.
607 zzip_size_t
608 zzip_entry_fread(void *ptr, zzip_size_t sized, zzip_size_t nmemb,
609 ZZIP_ENTRY_FILE * file)
611 if (! file)
612 return 0;
613 ___ zzip_size_t size = sized * nmemb;
614 if (! file->compressed)
616 if (size > file->avail)
617 size = file->avail;
618 if (fread(ptr, 1, size, file->entry->diskfile) != size) return 0;
619 file->dataoff += size;
620 file->avail -= size;
621 return size;
624 file->zlib.avail_out = size;
625 file->zlib.next_out = ptr;
626 ___ zzip_size_t total_old = file->zlib.total_out;
627 while (1)
629 if (! file->zlib.avail_in)
631 size = file->compressed - file->dataoff;
632 if (size > sizeof(file->buffer))
633 size = sizeof(file->buffer);
634 /* fseek (file->data + file->dataoff, file->entry->diskfile); */
635 file->zlib.avail_in = fread(file->buffer, 1, size,
636 file->entry->diskfile);
637 file->zlib.next_in = file->buffer;
638 file->dataoff += file->zlib.avail_in;
640 if (! file->zlib.avail_in)
641 return 0;
643 ___ int err = inflate(&file->zlib, Z_NO_FLUSH);
644 if (err == Z_STREAM_END)
645 file->avail = 0;
646 else if (err == Z_OK)
647 file->avail -= file->zlib.total_out - total_old;
648 else
649 return 0;
650 ____;
651 if (file->zlib.avail_out && ! file->zlib.avail_in)
652 continue;
653 return file->zlib.total_out - total_old;
655 ____;
656 ____;
659 /** => zzip_entry_fopen
660 * This function releases any zlib decoder info needed for decompression
661 * and dumps the ZZIP_ENTRY_FILE struct then.
664 zzip_entry_fclose(ZZIP_ENTRY_FILE * file)
666 if (! file)
667 return 0;
668 if (file->compressed)
669 inflateEnd(&file->zlib);
670 zzip_entry_free(file->entry);
671 free(file);
672 return 0;
675 /** => zzip_entry_fopen
677 * This function allows to distinguish an error from an eof condition.
678 * Actually, if we found an error but we did already reach eof then we
679 * just keep on saying that it was an eof, so the app can just continue.
682 zzip_entry_feof(ZZIP_ENTRY_FILE * file)
684 return ! file || ! file->avail;