iv.vfs: don't turn "w+" mode to "r+" mode, lol
[iv.d.git] / arcz.d
blobf4d31922e4c78e406991aa5de6eaa5e676ba062d
1 /** ARZ chunked archive format processor.
3 * This module provides `std.stdio.File`-like interface to ARZ archives.
5 * Copyright: Copyright Ketmar Dark, 2016
7 * License: Boost License 1.0
8 */
9 module iv.arcz /*is aliced*/;
10 import iv.alice;
12 // use Balz compressor if available
13 static if (__traits(compiles, { import iv.balz; })) enum arcz_has_balz = true; else enum arcz_has_balz = false;
14 static if (__traits(compiles, { import iv.zopfli; })) enum arcz_has_zopfli = true; else enum arcz_has_zopfli = false;
15 static if (arcz_has_balz) import iv.balz;
16 static if (arcz_has_zopfli) import iv.zopfli;
18 // comment this to free pakced chunk buffer right after using
19 // i.e. `AZFile` will allocate new block for each new chunk
20 //version = arcz_use_more_memory;
22 public import core.stdc.stdio : SEEK_SET, SEEK_CUR, SEEK_END;
25 // ////////////////////////////////////////////////////////////////////////// //
26 /// ARZ archive accessor. Use this to open ARZ archives, and open packed files from ARZ archives.
27 public struct ArzArchive {
28 private:
29 static assert(usize.sizeof >= (void*).sizeof);
30 private import core.stdc.stdio : FILE, fopen, fclose, fread, fseek;
31 private import etc.c.zlib;
33 static struct ChunkInfo {
34 uint ofs; // offset in file
35 uint pksize; // packed chunk size (same as chunk size: chunk is unpacked)
38 static struct FileInfo {
39 string name;
40 uint chunk;
41 uint chunkofs; // offset of first file byte in unpacked chunk
42 uint size; // unpacked file size
45 static struct Nfo {
46 uint rc = 1; // refcounter
47 ChunkInfo[] chunks;
48 FileInfo[string] files;
49 uint chunkSize;
50 uint lastChunkSize;
51 bool useBalz;
52 FILE* afl; // archive file, we'll keep it opened
54 @disable this (this); // no copies!
56 static void decRef (usize me) {
57 if (me) {
58 auto nfo = cast(Nfo*)me;
59 assert(nfo.rc);
60 if (--nfo.rc == 0) {
61 import core.memory : GC;
62 import core.stdc.stdlib : free;
63 if (nfo.afl !is null) fclose(nfo.afl);
64 nfo.chunks.destroy;
65 nfo.files.destroy;
66 nfo.afl = null;
67 GC.removeRange(cast(void*)nfo/*, Nfo.sizeof*/);
68 free(nfo);
69 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Nfo %p freed\n", nfo); }
75 usize nfop; // hide it from GC
77 private @property Nfo* nfo () { pragma(inline, true); return cast(Nfo*)nfop; }
78 void decRef () { pragma(inline, true); Nfo.decRef(nfop); nfop = 0; }
80 static uint readUint (FILE* fl) {
81 if (fl is null) throw new Exception("cannot read from closed file");
82 uint v;
83 if (fread(&v, 1, v.sizeof, fl) != v.sizeof) throw new Exception("file reading error");
84 version(BigEndian) {
85 import core.bitop : bswap;
86 v = bswap(v);
87 } else version(LittleEndian) {
88 // nothing to do
89 } else {
90 static assert(0, "wtf?!");
92 return v;
95 static uint readUbyte (FILE* fl) {
96 if (fl is null) throw new Exception("cannot read from closed file");
97 ubyte v;
98 if (fread(&v, 1, v.sizeof, fl) != v.sizeof) throw new Exception("file reading error");
99 return v;
102 static void readBuf (FILE* fl, void[] buf) {
103 if (buf.length > 0) {
104 if (fl is null) throw new Exception("cannot read from closed file");
105 if (fread(buf.ptr, 1, buf.length, fl) != buf.length) throw new Exception("file reading error");
109 static T* xalloc(T, bool clear=true) (uint mem) if (T.sizeof > 0) {
110 import core.exception : onOutOfMemoryError;
111 assert(mem != 0);
112 static if (clear) {
113 import core.stdc.stdlib : calloc;
114 auto res = calloc(mem, T.sizeof);
115 if (res is null) onOutOfMemoryError();
116 static if (is(T == struct)) {
117 import core.stdc.string : memcpy;
118 static immutable T i = T.init;
119 foreach (immutable idx; 0..mem) memcpy(res+idx, &i, T.sizeof);
121 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("allocated %u bytes at %p\n", cast(uint)(mem*T.sizeof), res); }
122 return cast(T*)res;
123 } else {
124 import core.stdc.stdlib : malloc;
125 auto res = malloc(mem*T.sizeof);
126 if (res is null) onOutOfMemoryError();
127 static if (is(T == struct)) {
128 import core.stdc.string : memcpy;
129 static immutable T i = T.init;
130 foreach (immutable idx; 0..mem) memcpy(res+idx, &i, T.sizeof);
132 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("allocated %u bytes at %p\n", cast(uint)(mem*T.sizeof), res); }
133 return cast(T*)res;
137 static void xfree(T) (T* ptr) {
138 if (ptr !is null) {
139 import core.stdc.stdlib : free;
140 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("freing at %p\n", ptr); }
141 free(ptr);
145 static if (arcz_has_balz) static ubyte balzDictSize (uint blockSize) {
146 foreach (ubyte bits; Balz.MinDictBits..Balz.MaxDictBits+1) {
147 if ((1U<<bits) >= blockSize) return bits;
149 return Balz.MaxDictBits;
152 // unpack exactly `destlen` bytes
153 static if (arcz_has_balz) static void unpackBlockBalz (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize) {
154 Unbalz bz;
155 bz.reinit(balzDictSize(blocksize));
156 int ipos, opos;
157 auto dc = bz.decompress(
158 // reader
159 (buf) {
160 import core.stdc.string : memcpy;
161 if (ipos >= srclen) return 0;
162 uint rd = destlen-ipos;
163 if (rd > buf.length) rd = cast(uint)buf.length;
164 memcpy(buf.ptr, src+ipos, rd);
165 ipos += rd;
166 return rd;
168 // writer
169 (buf) {
170 //if (opos+buf.length > destlen) throw new Exception("error unpacking archive");
171 uint wr = destlen-opos;
172 if (wr > buf.length) wr = cast(uint)buf.length;
173 if (wr > 0) {
174 import core.stdc.string : memcpy;
175 memcpy(dest+opos, buf.ptr, wr);
176 opos += wr;
179 // unpack length
180 destlen
182 if (opos != destlen) throw new Exception("error unpacking archive");
185 static void unpackBlockZLib (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize) {
186 z_stream zs;
187 zs.avail_in = 0;
188 zs.avail_out = 0;
189 // initialize unpacker
190 if (inflateInit2(&zs, 15) != Z_OK) throw new Exception("can't initialize zlib");
191 scope(exit) inflateEnd(&zs);
192 zs.next_in = cast(typeof(zs.next_in))src;
193 zs.avail_in = srclen;
194 zs.next_out = cast(typeof(zs.next_out))dest;
195 zs.avail_out = destlen;
196 while (zs.avail_out > 0) {
197 auto err = inflate(&zs, Z_SYNC_FLUSH);
198 if (err != Z_STREAM_END && err != Z_OK) throw new Exception("error unpacking archive");
199 if (err == Z_STREAM_END) break;
201 if (zs.avail_out != 0) throw new Exception("error unpacking archive");
204 static void unpackBlock (void* dest, uint destlen, const(void)* src, uint srclen, uint blocksize, bool useBalz) {
205 if (useBalz) {
206 static if (arcz_has_balz) {
207 unpackBlockBalz(dest, destlen, src, srclen, blocksize);
208 } else {
209 throw new Exception("no Balz support was compiled in ArcZ");
211 } else {
212 unpackBlockZLib(dest, destlen, src, srclen, blocksize);
216 public:
217 this (in ArzArchive arc) {
218 assert(nfop == 0);
219 nfop = arc.nfop;
220 if (nfop) ++nfo.rc;
223 this (this) {
224 if (nfop) ++nfo.rc;
227 ~this () { close(); }
229 void opAssign (in ArzArchive arc) {
230 if (arc.nfop) {
231 auto n = cast(Nfo*)arc.nfop;
232 ++n.rc;
234 decRef();
235 nfop = arc.nfop;
238 void close () { decRef(); }
240 @property FileInfo[string] files () { return (nfop ? nfo.files : null); }
242 void openArchive (const(char)[] filename) {
243 debug/*(arcz)*/ import core.stdc.stdio : printf;
244 FILE* fl = null;
245 scope(exit) if (fl !is null) fclose(fl);
246 close();
247 if (filename.length == 0) throw new Exception("cannot open unnamed archive file");
248 if (filename.length < 2048) {
249 import core.stdc.stdlib : alloca;
250 auto tfn = (cast(char*)alloca(filename.length+1))[0..filename.length+1];
251 tfn[0..filename.length] = filename[];
252 tfn[filename.length] = 0;
253 fl = fopen(tfn.ptr, "rb");
254 } else {
255 import core.stdc.stdlib : malloc, free;
256 auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
257 if (tfn !is null) {
258 scope(exit) free(tfn.ptr);
259 fl = fopen(tfn.ptr, "rb");
262 if (fl is null) throw new Exception("cannot open archive file '"~filename.idup~"'");
263 char[4] sign;
264 bool useBalz;
265 readBuf(fl, sign[]);
266 if (sign != "CZA2") throw new Exception("invalid archive file '"~filename.idup~"'");
267 switch (readUbyte(fl)) {
268 case 0: useBalz = false; break;
269 case 1: useBalz = true; break;
270 default: throw new Exception("invalid version of archive file '"~filename.idup~"'");
272 uint indexofs = readUint(fl); // index offset in file
273 uint pkidxsize = readUint(fl); // packed index size
274 uint idxsize = readUint(fl); // unpacked index size
275 if (pkidxsize == 0 || idxsize == 0 || indexofs == 0) throw new Exception("invalid archive file '"~filename.idup~"'");
276 // now read index
277 ubyte* idxbuf = null;
278 scope(exit) xfree(idxbuf);
280 auto pib = xalloc!ubyte(pkidxsize);
281 scope(exit) xfree(pib);
282 if (fseek(fl, indexofs, 0) < 0) throw new Exception("seek error in archive file '"~filename.idup~"'");
283 readBuf(fl, pib[0..pkidxsize]);
284 idxbuf = xalloc!ubyte(idxsize);
285 unpackBlock(idxbuf, idxsize, pib, pkidxsize, idxsize, useBalz);
288 // parse index and build structures
289 uint idxbufpos = 0;
291 ubyte getUbyte () {
292 if (idxsize-idxbufpos < ubyte.sizeof) throw new Exception("invalid index for archive file '"~filename.idup~"'");
293 return idxbuf[idxbufpos++];
296 uint getUint () {
297 if (idxsize-idxbufpos < uint.sizeof) throw new Exception("invalid index for archive file '"~filename.idup~"'");
298 version(BigEndian) {
299 import core.bitop : bswap;
300 uint v = *cast(uint*)(idxbuf+idxbufpos);
301 idxbufpos += 4;
302 return bswap(v);
303 } else version(LittleEndian) {
304 uint v = *cast(uint*)(idxbuf+idxbufpos);
305 idxbufpos += 4;
306 return v;
307 } else {
308 static assert(0, "wtf?!");
312 void getBuf (void[] buf) {
313 if (buf.length > 0) {
314 import core.stdc.string : memcpy;
315 if (idxsize-idxbufpos < buf.length) throw new Exception("invalid index for archive file '"~filename.idup~"'");
316 memcpy(buf.ptr, idxbuf+idxbufpos, buf.length);
317 idxbufpos += buf.length;
321 // allocate shared info struct
322 Nfo* nfo = xalloc!Nfo(1);
323 assert(nfo.rc == 1);
324 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Nfo %p allocated\n", nfo); }
325 scope(failure) decRef();
326 nfop = cast(usize)nfo;
328 import core.memory : GC;
329 GC.addRange(nfo, Nfo.sizeof);
332 // read chunk info and data
333 nfo.useBalz = useBalz;
334 nfo.chunkSize = getUint;
335 auto ccount = getUint; // chunk count
336 nfo.lastChunkSize = getUint;
337 debug(arcz_dirread) printf("chunk size: %u\nchunk count: %u\nlast chunk size:%u\n", nfo.chunkSize, ccount, nfo.lastChunkSize);
338 if (ccount == 0 || nfo.chunkSize < 1 || nfo.lastChunkSize < 1 || nfo.lastChunkSize > nfo.chunkSize) throw new Exception("invalid archive file '"~filename.idup~"'");
339 nfo.chunks.length = ccount;
340 // chunk offsets and sizes
341 foreach (ref ci; nfo.chunks) {
342 ci.ofs = getUint;
343 ci.pksize = getUint;
345 // read file count and info
346 auto fcount = getUint;
347 if (fcount == 0) throw new Exception("empty archive file '"~filename.idup~"'");
348 // calc name buffer position and size
349 //immutable uint nbofs = idxbufpos+fcount*(5*4);
350 //if (nbofs >= idxsize) throw new Exception("invalid index in archive file '"~filename.idup~"'");
351 //immutable uint nbsize = idxsize-nbofs;
352 debug(arcz_dirread) printf("file count: %u\n", fcount);
353 foreach (immutable _; 0..fcount) {
354 uint nameofs = getUint;
355 uint namelen = getUint;
356 if (namelen == 0) {
357 // skip unnamed file
358 //throw new Exception("invalid archive file '"~filename.idup~"'");
359 getUint; // chunk number
360 getUint; // offset in chunk
361 getUint; // unpacked size
362 debug(arcz_dirread) printf("skipped empty file\n");
363 } else {
364 //if (nameofs >= nbsize || namelen > nbsize || nameofs+namelen > nbsize) throw new Exception("invalid index in archive file '"~filename.idup~"'");
365 if (nameofs >= idxsize || namelen > idxsize || nameofs+namelen > idxsize) throw new Exception("invalid index in archive file '"~filename.idup~"'");
366 FileInfo fi;
367 auto nb = new char[](namelen);
368 nb[0..namelen] = (cast(char*)idxbuf)[nameofs..nameofs+namelen];
369 fi.name = cast(string)(nb); // it is safe here
370 fi.chunk = getUint; // chunk number
371 fi.chunkofs = getUint; // offset in chunk
372 fi.size = getUint; // unpacked size
373 debug(arcz_dirread) printf("file size: %u\nfile chunk: %u\noffset in chunk:%u; name: [%.*s]\n", fi.size, fi.chunk, fi.chunkofs, cast(uint)fi.name.length, fi.name.ptr);
374 nfo.files[fi.name] = fi;
377 // transfer achive file ownership
378 nfo.afl = fl;
379 fl = null;
382 bool exists (const(char)[] name) { if (nfop) return ((name in nfo.files) !is null); else return false; }
384 AZFile open (const(char)[] name) {
385 if (!nfop) throw new Exception("can't open file from non-opened archive");
386 if (auto fi = name in nfo.files) {
387 auto zl = xalloc!LowLevelPackedRO(1);
388 scope(failure) xfree(zl);
389 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p allocated\n", zl); }
390 zl.setup(nfo, fi.chunk, fi.chunkofs, fi.size);
391 AZFile fl;
392 fl.zlp = cast(usize)zl;
393 return fl;
395 throw new Exception("can't open file '"~name.idup~"' from archive");
398 private:
399 static struct LowLevelPackedRO {
400 private import etc.c.zlib;
402 uint rc = 1;
403 usize nfop; // hide it from GC
405 private @property inout(Nfo*) nfo () inout pure nothrow @trusted @nogc { pragma(inline, true); return cast(typeof(return))nfop; }
406 static void decRef (usize me) {
407 if (me) {
408 auto zl = cast(LowLevelPackedRO*)me;
409 assert(zl.rc);
410 if (--zl.rc == 0) {
411 import core.stdc.stdlib : free;
412 if (zl.chunkData !is null) free(zl.chunkData);
413 version(arcz_use_more_memory) if (zl.pkdata !is null) free(zl.pkdata);
414 Nfo.decRef(zl.nfop);
415 free(zl);
416 debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p freed\n", zl); }
417 } else {
418 //debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p; rc after decRef is %u\n", zl, zl.rc); }
423 uint nextchunk; // next chunk to read
424 uint curcpos; // position in current chunk
425 uint curcsize; // number of valid bytes in `chunkData`
426 uint stchunk; // starting chunk
427 uint stofs; // offset in starting chunk
428 uint totalsize; // total file size
429 uint pos; // current file position
430 uint lastrdpos; // last actual read position
431 z_stream zs;
432 ubyte* chunkData; // can be null
433 version(arcz_use_more_memory) {
434 ubyte* pkdata;
435 uint pkdatasize;
438 @disable this (this);
440 void setup (Nfo* anfo, uint astchunk, uint astofs, uint asize) {
441 assert(anfo !is null);
442 assert(rc == 1);
443 nfop = cast(usize)anfo;
444 ++anfo.rc;
445 nextchunk = stchunk = astchunk;
446 //curcpos = 0;
447 stofs = astofs;
448 totalsize = asize;
451 @property bool eof () { pragma(inline, true); return (pos >= totalsize); }
453 // return less than chunk size if our file fits in one non-full chunk completely
454 uint justEnoughMemory () pure const nothrow @safe @nogc {
455 pragma(inline, true);
456 version(none) {
457 return nfo.chunkSize;
458 } else {
459 return (totalsize < nfo.chunkSize && stofs+totalsize < nfo.chunkSize ? stofs+totalsize : nfo.chunkSize);
463 void unpackNextChunk () {
464 if (nfop == 0) assert(0, "wtf?!");
465 //scope(failure) if (chunkData !is null) { xfree(chunkData); chunkData = null; }
466 debug(arcz_unp) { import core.stdc.stdio : printf; printf("unpacking chunk %u\n", nextchunk); }
467 // allocate buffer for unpacked data
468 if (chunkData is null) {
469 // optimize things a little: if our file fits in less then one chunk, allocate "just enough" memory
470 chunkData = xalloc!(ubyte, false)(justEnoughMemory);
472 auto chunk = &nfo.chunks[nextchunk];
473 if (chunk.pksize == nfo.chunkSize) {
474 // unpacked chunk, just read it
475 debug(arcz_unp) { import core.stdc.stdio : printf; printf(" chunk is not packed\n"); }
476 if (fseek(nfo.afl, chunk.ofs, 0) < 0) throw new Exception("ARCZ reading error");
477 if (fread(chunkData, 1, nfo.chunkSize, nfo.afl) != nfo.chunkSize) throw new Exception("ARCZ reading error");
478 curcsize = nfo.chunkSize;
479 } else {
480 // packed chunk, unpack it
481 // allocate buffer for packed data
482 version(arcz_use_more_memory) {
483 import core.stdc.stdlib : realloc;
484 if (pkdatasize < chunk.pksize) {
485 import core.exception : onOutOfMemoryError;
486 auto newpk = realloc(pkdata, chunk.pksize);
487 if (newpk is null) onOutOfMemoryError();
488 debug(arcz_alloc) { import core.stdc.stdio : printf; printf("reallocated from %u to %u bytes; %p -> %p\n", cast(uint)pkdatasize, cast(uint)chunk.pksize, pkdata, newpk); }
489 pkdata = cast(ubyte*)newpk;
490 pkdatasize = chunk.pksize;
492 alias pkd = pkdata;
493 } else {
494 auto pkd = xalloc!(ubyte, false)(chunk.pksize);
495 scope(exit) xfree(pkd);
497 if (fseek(nfo.afl, chunk.ofs, 0) < 0) throw new Exception("ARCZ reading error");
498 if (fread(pkd, 1, chunk.pksize, nfo.afl) != chunk.pksize) throw new Exception("ARCZ reading error");
499 uint upsize = (nextchunk == nfo.chunks.length-1 ? nfo.lastChunkSize : nfo.chunkSize); // unpacked chunk size
500 immutable uint cksz = upsize;
501 immutable uint jem = justEnoughMemory;
502 if (upsize > jem) upsize = jem;
503 debug(arcz_unp) { import core.stdc.stdio : printf; printf(" unpacking %u bytes to %u bytes\n", chunk.pksize, upsize); }
504 ArzArchive.unpackBlock(chunkData, upsize, pkd, chunk.pksize, cksz, nfo.useBalz);
505 curcsize = upsize;
507 curcpos = 0;
508 // fix first chunk offset if necessary
509 if (nextchunk == stchunk && stofs > 0) {
510 // it's easier to just memmove it
511 import core.stdc.string : memmove;
512 assert(stofs < curcsize);
513 memmove(chunkData, chunkData+stofs, curcsize-stofs);
514 curcsize -= stofs;
516 ++nextchunk; // advance to next chunk
519 void syncReadPos () {
520 if (pos >= totalsize || pos == lastrdpos) return;
521 immutable uint fcdata = nfo.chunkSize-stofs; // number of our bytes in the first chunk
522 // does our pos lie in the first chunk?
523 if (pos < fcdata) {
524 // yep, just read it
525 if (nextchunk != stchunk+1) {
526 nextchunk = stchunk;
527 unpackNextChunk(); // we'll need it anyway
528 } else {
529 // just rewind
530 curcpos = 0;
532 curcpos += pos;
533 lastrdpos = pos;
534 return;
536 // find the chunk we want
537 uint npos = pos-fcdata;
538 uint xblock = stchunk+1+npos/nfo.chunkSize;
539 uint curcstart = (xblock-(stchunk+1))*nfo.chunkSize+fcdata;
540 if (xblock != nextchunk-1) {
541 // read and unpack this chunk
542 nextchunk = xblock;
543 unpackNextChunk();
544 } else {
545 // just rewind
546 curcpos = 0;
548 assert(pos >= curcstart && pos < curcstart+nfo.chunkSize);
549 uint skip = pos-curcstart;
550 lastrdpos = pos;
551 curcpos += skip;
554 int read (void* buf, uint count) {
555 if (buf is null) return -1;
556 if (count == 0 || totalsize == 0) return 0;
557 if (totalsize >= 0 && pos >= totalsize) return 0; // EOF
558 syncReadPos();
559 assert(lastrdpos == pos);
560 if (cast(long)pos+count > totalsize) count = totalsize-pos;
561 auto res = count;
562 while (count > 0) {
563 debug(arcz_read) { import core.stdc.stdio : printf; printf("reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count, pos, lastrdpos, curcpos, curcsize); }
564 import core.stdc.string : memcpy;
565 if (curcpos >= curcsize) {
566 unpackNextChunk(); // we want next chunk!
567 debug(arcz_read) { import core.stdc.stdio : printf; printf(" *reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count, pos, lastrdpos, curcpos, curcsize); }
569 assert(curcpos < curcsize && curcsize != 0);
570 int rd = (curcsize-curcpos >= count ? count : curcsize-curcpos);
571 assert(rd > 0);
572 memcpy(buf, chunkData+curcpos, rd);
573 curcpos += rd;
574 pos += rd;
575 lastrdpos += rd;
576 buf += rd;
577 count -= rd;
579 assert(pos == lastrdpos);
580 return res;
583 long lseek (long ofs, int origin) {
584 //TODO: overflow checks
585 switch (origin) {
586 case SEEK_SET: break;
587 case SEEK_CUR: ofs += pos; break;
588 case SEEK_END:
589 if (ofs > 0) ofs = 0;
590 if (-ofs > totalsize) ofs = -cast(long)totalsize;
591 ofs += totalsize;
592 break;
593 default:
594 return -1;
596 if (ofs < 0) return -1;
597 if (totalsize >= 0 && ofs > totalsize) ofs = totalsize;
598 pos = cast(uint)ofs;
599 return pos;
605 // ////////////////////////////////////////////////////////////////////////// //
606 /// Opened file.
607 public struct AZFile {
608 private:
609 usize zlp;
611 private @property inout(ArzArchive.LowLevelPackedRO)* zl () inout pure nothrow @trusted @nogc { pragma(inline, true); return cast(typeof(return))zlp; }
612 private void decRef () { pragma(inline, true); ArzArchive.LowLevelPackedRO.decRef(zlp); zlp = 0; }
614 public:
615 this (in AZFile afl) {
616 assert(zlp == 0);
617 zlp = afl.zlp;
618 if (zlp) ++zl.rc;
621 this (this) {
622 if (zlp) ++zl.rc;
625 ~this () { close(); }
627 void opAssign (in AZFile afl) {
628 if (afl.zlp) {
629 auto n = cast(ArzArchive.LowLevelPackedRO*)afl.zlp;
630 ++n.rc;
632 decRef();
633 zlp = afl.zlp;
636 void close () { decRef(); }
638 @property bool isOpen () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp != 0); }
639 @property uint size () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp ? zl.totalsize : 0); }
640 @property uint tell () const pure nothrow @safe @nogc { pragma(inline, true); return (zlp ? zl.pos : 0); }
642 void seek (long ofs, int origin=SEEK_SET) {
643 if (!zlp) throw new Exception("can't seek in closed file");
644 auto res = zl.lseek(ofs, origin);
645 if (res < 0) throw new Exception("seek error");
648 private import std.traits : isMutable;
650 //TODO: overflow check
651 T[] rawRead(T) (T[] buf) if (isMutable!T) {
652 if (!zlp) throw new Exception("can't read from closed file");
653 if (buf.length > 0) {
654 auto res = zl.read(buf.ptr, buf.length*T.sizeof);
655 if (res == -1 || res%T.sizeof != 0) throw new Exception("read error");
656 return buf[0..res/T.sizeof];
657 } else {
658 return buf[0..0];
664 // ////////////////////////////////////////////////////////////////////////// //
665 /** this class can be used to create archive file.
667 * Example:
668 * --------------------
669 * import std.file, std.path, std.stdio : File;
671 * enum ArcName = "z00.arz";
672 * enum DirName = "experimental-docs";
674 * ubyte[] rdbuf;
675 * rdbuf.length = 65536;
677 * auto arcz = new ArzCreator(ArcName);
678 * long total = 0;
679 * foreach (DirEntry e; dirEntries(DirName, SpanMode.breadth)) {
680 * if (e.isFile) {
681 * assert(e.size < uint.max);
682 * //writeln(e.name);
683 * total += e.size;
684 * string fname = e.name[DirName.length+1..$];
685 * arcz.newFile(fname, cast(uint)e.size);
686 * auto fi = File(e.name);
687 * for (;;) {
688 * auto rd = fi.rawRead(rdbuf[]);
689 * if (rd.length == 0) break;
690 * arcz.rawWrite(rd[]);
694 * arcz.close();
695 * writeln(total, " bytes packed to ", getSize(ArcName), " (", arcz.chunksWritten, " chunks, ", arcz.filesWritten, " files)");
696 * --------------------
698 final class ArzCreator {
699 private import etc.c.zlib;
700 private import core.stdc.stdio : FILE, fopen, fclose, ftell, fseek, fwrite;
702 public:
703 //WARNING! don't change the order!
704 enum Compressor {
705 ZLib, // default
706 Balz,
707 BalzMax, // Balz, maximum compression
708 Zopfli, // this will fallback to zlib if no zopfli support was compiled in
711 private:
712 static struct ChunkInfo {
713 uint ofs; // offset in file
714 uint pksize; // packed chunk size
717 static struct FileInfo {
718 string name;
719 uint chunk;
720 uint chunkofs; // offset of first file byte in unpacked chunk
721 uint size; // unpacked file size
724 private:
725 ubyte[] chunkdata;
726 uint cdpos;
727 FILE* arcfl;
728 ChunkInfo[] chunks;
729 FileInfo[] files;
730 uint lastChunkSize;
731 uint statChunks, statFiles;
732 Compressor cpr = Compressor.ZLib;
734 private:
735 void writeUint (uint v) {
736 if (arcfl is null) throw new Exception("write error");
737 version(BigEndian) {
738 import core.bitop : bswap;
739 v = bswap(v);
740 } else version(LittleEndian) {
741 // nothing to do
742 } else {
743 static assert(0, "wtf?!");
745 if (fwrite(&v, 1, v.sizeof, arcfl) != v.sizeof) throw new Exception("write error"); // signature
748 void writeUbyte (ubyte v) {
749 if (arcfl is null) throw new Exception("write error");
750 if (fwrite(&v, 1, v.sizeof, arcfl) != v.sizeof) throw new Exception("write error"); // signature
753 void writeBuf (const(void)[] buf) {
754 if (buf.length > 0) {
755 if (arcfl is null) throw new Exception("write error");
756 if (fwrite(buf.ptr, 1, buf.length, arcfl) != buf.length) throw new Exception("write error"); // signature
760 static if (arcz_has_balz) long writePackedBalz (const(void)[] upbuf) {
761 assert(upbuf.length > 0 && upbuf.length < int.max);
762 long res = 0;
763 Balz bz;
764 int ipos, opos;
765 bz.reinit(ArzArchive.balzDictSize(cast(uint)upbuf.length));
766 bz.compress(
767 // reader
768 (buf) {
769 import core.stdc.string : memcpy;
770 if (ipos >= upbuf.length) return 0;
771 uint rd = cast(uint)upbuf.length-ipos;
772 if (rd > buf.length) rd = cast(uint)buf.length;
773 memcpy(buf.ptr, upbuf.ptr+ipos, rd);
774 ipos += rd;
775 return rd;
777 // writer
778 (buf) {
779 res += buf.length;
780 writeBuf(buf[]);
782 // max mode
783 (cpr == Compressor.BalzMax)
785 return res;
788 static if (arcz_has_zopfli) long writePackedZopfli (const(void)[] upbuf) {
789 ubyte[] indata;
790 void* odata;
791 usize osize;
792 ZopfliOptions opts;
793 ZopfliCompress(opts, ZOPFLI_FORMAT_ZLIB, upbuf.ptr, upbuf.length, &odata, &osize);
794 writeBuf(odata[0..osize]);
795 ZopfliFree(odata);
796 return cast(long)osize;
799 long writePackedZLib (const(void)[] upbuf) {
800 assert(upbuf.length > 0 && upbuf.length < int.max);
801 long res = 0;
802 z_stream zs;
803 ubyte[2048] obuf;
804 zs.next_out = obuf.ptr;
805 zs.avail_out = cast(uint)obuf.length;
806 zs.next_in = null;
807 zs.avail_in = 0;
808 // initialize packer
809 if (deflateInit2(&zs, Z_BEST_COMPRESSION, Z_DEFLATED, 15, 9, 0) != Z_OK) throw new Exception("can't write packed data");
810 scope(exit) deflateEnd(&zs);
811 zs.next_in = cast(typeof(zs.next_in))upbuf.ptr;
812 zs.avail_in = cast(uint)upbuf.length;
813 while (zs.avail_in > 0) {
814 if (zs.avail_out == 0) {
815 res += cast(uint)obuf.length;
816 writeBuf(obuf[]);
817 zs.next_out = obuf.ptr;
818 zs.avail_out = cast(uint)obuf.length;
820 auto err = deflate(&zs, Z_NO_FLUSH);
821 if (err != Z_OK) throw new Exception("zlib compression error");
823 while (zs.avail_out != obuf.length) {
824 res += cast(uint)obuf.length-zs.avail_out;
825 writeBuf(obuf[0..$-zs.avail_out]);
826 zs.next_out = obuf.ptr;
827 zs.avail_out = cast(uint)obuf.length;
828 auto err = deflate(&zs, Z_FINISH);
829 if (err != Z_OK && err != Z_STREAM_END) throw new Exception("zlib compression error");
830 // succesfully flushed?
831 //if (err != Z_STREAM_END) throw new VFSException("zlib compression error");
833 return res;
836 // return size of packed data written
837 uint writePackedBuf (const(void)[] upbuf) {
838 assert(upbuf.length > 0 && upbuf.length < int.max);
839 long res = 0;
840 final switch (cpr) {
841 case Compressor.ZLib:
842 res = writePackedZLib(upbuf);
843 break;
844 case Compressor.Balz:
845 case Compressor.BalzMax:
846 static if (arcz_has_balz) {
847 res = writePackedBalz(upbuf);
848 break;
849 } else {
850 new Exception("no Balz support was compiled in ArcZ");
852 case Compressor.Zopfli:
853 static if (arcz_has_zopfli) {
854 res = writePackedZopfli(upbuf);
855 //break;
856 } else {
857 //new Exception("no Zopfli support was compiled in ArcZ");
858 res = writePackedZLib(upbuf);
860 break;
862 if (res > uint.max) throw new Exception("output archive too big");
863 return cast(uint)res;
866 void flushData () {
867 if (cdpos > 0) {
868 ChunkInfo ci;
869 auto pos = ftell(arcfl);
870 if (pos < 0 || pos >= uint.max) throw new Exception("output archive too big");
871 ci.ofs = cast(uint)pos;
872 auto wlen = writePackedBuf(chunkdata[0..cdpos]);
873 ci.pksize = wlen;
874 if (cdpos == chunkdata.length && ci.pksize >= chunkdata.length) {
875 // wow, this chunk is unpackable
876 //{ import std.stdio; writeln("unpackable chunk found!"); }
877 if (fseek(arcfl, pos, 0) < 0) throw new Exception("can't seek in output file");
878 writeBuf(chunkdata[0..cdpos]);
879 version(Posix) {
880 import core.stdc.stdio : fileno;
881 import core.sys.posix.unistd : ftruncate;
882 pos = ftell(arcfl);
883 if (pos < 0 || pos >= uint.max) throw new Exception("output archive too big");
884 if (ftruncate(fileno(arcfl), cast(uint)pos) < 0) throw new Exception("error truncating output file");
886 ci.pksize = cdpos;
888 if (cdpos < chunkdata.length) lastChunkSize = cast(uint)cdpos;
889 cdpos = 0;
890 chunks ~= ci;
891 } else {
892 lastChunkSize = cast(uint)chunkdata.length;
896 void closeArc () {
897 flushData();
898 // write index
899 //assert(ftell(arcfl) > 0 && ftell(arcfl) < uint.max);
900 assert(chunkdata.length < uint.max);
901 assert(chunks.length < uint.max);
902 assert(files.length < uint.max);
903 // create index in memory
904 ubyte[] index;
906 void putUint (uint v) {
907 index ~= v&0xff;
908 index ~= (v>>8)&0xff;
909 index ~= (v>>16)&0xff;
910 index ~= (v>>24)&0xff;
913 void putUbyte (ubyte v) {
914 index ~= v;
917 void putBuf (const(void)[] buf) {
918 assert(buf.length > 0);
919 index ~= (cast(const(ubyte)[])buf)[];
922 // create index in memory
924 // chunk size
925 putUint(cast(uint)chunkdata.length);
926 // chunk count
927 putUint(cast(uint)chunks.length);
928 // last chunk size
929 putUint(lastChunkSize); // 0: last chunk is full
930 // chunk offsets and sizes
931 foreach (ref ci; chunks) {
932 putUint(ci.ofs);
933 putUint(ci.pksize);
935 // file count
936 putUint(cast(uint)files.length);
937 uint nbofs = cast(uint)index.length+cast(uint)files.length*(5*4);
938 //uint nbofs = 0;
939 // files
940 foreach (ref fi; files) {
941 // name: length(byte), chars
942 assert(fi.name.length > 0 && fi.name.length <= 16384);
943 putUint(nbofs);
944 putUint(cast(uint)fi.name.length);
945 nbofs += cast(uint)fi.name.length+1; // put zero byte there to ease C interfacing
946 //putBuf(fi.name[]);
947 // chunk number
948 putUint(fi.chunk);
949 // offset in unpacked chunk
950 putUint(fi.chunkofs);
951 // unpacked size
952 putUint(fi.size);
954 // names
955 foreach (ref fi; files) {
956 putBuf(fi.name[]);
957 putUbyte(0); // this means nothing, it is here just for convenience (hello, C!)
959 assert(index.length < uint.max);
961 auto cpos = ftell(arcfl);
962 if (cpos < 0 || cpos > uint.max) throw new Exception("output archive too big");
963 // write packed index
964 debug(arcz_writer) { import core.stdc.stdio : pinrtf; printf("index size: %u\n", cast(uint)index.length); }
965 auto pkisz = writePackedBuf(index[]);
966 debug(arcz_writer) { import core.stdc.stdio : pinrtf; printf("packed index size: %u\n", cast(uint)pkisz); }
967 // write index info
968 if (fseek(arcfl, 5, 0) < 0) throw new Exception("seek error");
969 // index offset in file
970 writeUint(cpos);
971 // packed index size
972 writeUint(pkisz);
973 // unpacked index size
974 writeUint(cast(uint)index.length);
975 // done
976 statChunks = cast(uint)chunks.length;
977 statFiles = cast(uint)files.length;
980 public:
981 this (const(char)[] fname, uint chunkSize=256*1024, Compressor acpr=Compressor.ZLib) {
982 import std.internal.cstring;
983 assert(chunkSize > 0 && chunkSize < 32*1024*1024); // arbitrary limit
984 static if (!arcz_has_balz) {
985 if (acpr == Compressor.Balz || acpr == Compressor.BalzMax) throw new Exception("no Balz support was compiled in ArcZ");
987 static if (!arcz_has_zopfli) {
988 //if (acpr == Compressor.Zopfli) throw new Exception("no Zopfli support was compiled in ArcZ");
990 cpr = acpr;
991 arcfl = fopen(fname.tempCString, "wb");
992 if (arcfl is null) throw new Exception("can't create output file '"~fname.idup~"'");
993 cdpos = 0;
994 chunkdata.length = chunkSize;
995 scope(failure) { fclose(arcfl); arcfl = null; }
996 writeBuf("CZA2"); // signature
997 if (cpr == Compressor.Balz || cpr == Compressor.BalzMax) {
998 writeUbyte(1); // version
999 } else {
1000 writeUbyte(0); // version
1002 writeUint(0); // offset to index
1003 writeUint(0); // packed index size
1004 writeUint(0); // unpacked index size
1007 ~this () { close(); }
1009 void close () {
1010 if (arcfl !is null) {
1011 scope(exit) { fclose(arcfl); arcfl = null; }
1012 closeArc();
1014 chunkdata = null;
1015 chunks = null;
1016 files = null;
1017 lastChunkSize = 0;
1018 cdpos = 0;
1021 // valid after closing
1022 @property uint chunksWritten () const pure nothrow @safe @nogc { pragma(inline, true); return statChunks; }
1023 @property uint filesWritten () const pure nothrow @safe @nogc { pragma(inline, true); return statFiles; }
1025 void newFile (string name, uint size) {
1026 FileInfo fi;
1027 assert(name.length <= 255);
1028 fi.name = name;
1029 fi.chunk = cast(uint)chunks.length;
1030 fi.chunkofs = cast(uint)cdpos;
1031 fi.size = size;
1032 files ~= fi;
1035 void rawWrite(T) (const(T)[] buffer) {
1036 if (buffer.length > 0) {
1037 auto src = cast(const(ubyte)*)buffer.ptr;
1038 auto len = buffer.length*T.sizeof;
1039 while (len > 0) {
1040 if (cdpos == chunkdata.length) flushData();
1041 if (cdpos < chunkdata.length) {
1042 auto wr = chunkdata.length-cdpos;
1043 if (wr > len) wr = len;
1044 chunkdata[cdpos..cdpos+wr] = src[0..wr];
1045 cdpos += wr;
1046 len -= wr;
1047 src += wr;
1055 // ////////////////////////////////////////////////////////////////////////// //
1056 /* arcz file format:
1057 header
1058 ======
1059 db 'CZA2' ; signature
1060 db version ; 0: zlib; 1: balz
1061 dd indexofs ; offset to packed index
1062 dd pkindexsz ; size of packed index
1063 dd upindexsz ; size of unpacked index
1066 index
1067 =====
1068 dd chunksize ; unpacked chunk size in bytes
1069 dd chunkcount ; number of chunks in file
1070 dd lastchunksz ; size of last chunk (it may be incomplete); 0: last chunk is completely used (all `chunksize` bytes)
1072 then chunk offsets and sizes follows:
1073 dd chunkofs ; from file start
1074 dd pkchunksz ; size of (possibly packed) chunk data; if it equals to `chunksize`, this chunk is not packed
1076 then file list follows:
1077 dd filecount ; number of files in archive
1079 then file info follows:
1080 dd nameofs ; (in index)
1081 dd namelen ; length of name (can't be 0)
1082 dd firstchunk ; chunk where file starts
1083 dd firstofs ; offset in first chunk (unpacked) where file starts
1084 dd filesize ; unpacked file size
1086 then name buffer follows -- just bytes