1 /** ARZ chunked archive format processor.
3 * This module provides `std.stdio.File`-like interface to ARZ archives.
5 * Copyright: Copyright Ketmar Dark, 2016
7 * License: Boost License 1.0
9 module iv
.arcz
/*is aliced*/;
12 // use Balz compressor if available
13 static if (__traits(compiles
, { import iv
.balz
; })) enum arcz_has_balz
= true; else enum arcz_has_balz
= false;
14 static if (__traits(compiles
, { import iv
.zopfli
; })) enum arcz_has_zopfli
= true; else enum arcz_has_zopfli
= false;
15 static if (__traits(compiles
, { import iv
.dlzma
; })) enum arcz_has_dlzma
= true; else enum arcz_has_dlzma
= false;
16 static if (arcz_has_balz
) import iv
.balz
;
17 static if (arcz_has_zopfli
) import iv
.zopfli
;
18 static if (arcz_has_dlzma
) import iv
.dlzma
;
20 // comment this to free pakced chunk buffer right after using
21 // i.e. `AZFile` will allocate new block for each new chunk
22 //version = arcz_use_more_memory;
24 public import core
.stdc
.stdio
: SEEK_SET
, SEEK_CUR
, SEEK_END
;
26 class ArczException
: Exception
{
27 this (string msg
, string file
=__FILE__
, usize line
=__LINE__
, Throwable next
=null) pure nothrow @safe @nogc {
28 super(msg
, file
, line
, next
);
33 // ////////////////////////////////////////////////////////////////////////// //
34 /// ARZ archive accessor. Use this to open ARZ archives, and open packed files from ARZ archives.
35 public struct ArzArchive
{
37 static assert(usize
.sizeof
>= (void*).sizeof
);
38 private import core
.stdc
.stdio
: FILE
, fopen
, fclose
, fread
, fseek
;
39 private import etc
.c
.zlib
;
40 private import core
.sync
.mutex
: Mutex
;
48 static struct ChunkInfo
{
49 uint ofs
; // offset in file
50 uint pksize
; // packed chunk size (same as chunk size: chunk is unpacked)
53 static struct FileInfo
{
56 uint chunkofs
; // offset of first file byte in unpacked chunk
57 uint size
; // unpacked file size
61 uint rc
= 1; // refcounter
63 FileInfo
[string
] files
;
67 FILE
* afl
; // archive file, we'll keep it opened
70 @disable this (this); // no copies!
72 static void freeMe (Nfo
* nfo
) {
73 import core
.memory
: GC
;
74 import core
.stdc
.stdlib
: free
;
75 if (nfo
.afl
!is null) fclose(nfo
.afl
);
76 if (nfo
.fileMutex
!is null) nfo
.fileMutex
.destroy
;
80 GC
.removeRange(cast(void*)nfo
/*, Nfo.sizeof*/);
82 debug(arcz_rc
) { import core
.stdc
.stdio
: printf
; printf("Nfo %p freed\n", nfo
); }
85 static void decRef (usize me
) {
88 auto nfo
= cast(Nfo
*)me
;
90 if (--nfo
.rc
== 0) freeMe(nfo
);
95 usize nfop
; // hide it from GC
97 private @property inout(Nfo
)* nfo () pure inout nothrow @trusted @nogc { pragma(inline
, true); return cast(Nfo
*)nfop
; }
98 void decRef () { pragma(inline
, true); Nfo
.decRef(nfop
); nfop
= 0; }
100 static uint readUint (FILE
* fl
) {
101 if (fl
is null) throw new ArczException("cannot read from closed file");
103 if (fread(&v
, 1, v
.sizeof
, fl
) != v
.sizeof
) throw new ArczException("file reading error");
105 import core
.bitop
: bswap;
107 } else version(LittleEndian
) {
110 static assert(0, "wtf?!");
115 static uint readUbyte (FILE
* fl
) {
116 if (fl
is null) throw new ArczException("cannot read from closed file");
118 if (fread(&v
, 1, v
.sizeof
, fl
) != v
.sizeof
) throw new ArczException("file reading error");
122 static void readBuf (FILE
* fl
, void[] buf
) {
123 if (buf
.length
> 0) {
124 if (fl
is null) throw new ArczException("cannot read from closed file");
125 if (fread(buf
.ptr
, 1, buf
.length
, fl
) != buf
.length
) throw new ArczException("file reading error");
129 static T
* xalloc(T
, bool clear
=true) (uint mem
) if (T
.sizeof
> 0) {
130 import core
.exception
: onOutOfMemoryError
;
133 import core
.stdc
.stdlib
: calloc
;
134 auto res
= calloc(mem
, T
.sizeof
);
135 if (res
is null) onOutOfMemoryError();
136 static if (is(T
== struct)) {
137 import core
.stdc
.string
: memcpy
;
138 static immutable T i
= T
.init
;
139 foreach (immutable idx
; 0..mem
) memcpy(res
+idx
, &i
, T
.sizeof
);
141 debug(arcz_alloc
) { import core
.stdc
.stdio
: printf
; printf("allocated %u bytes at %p\n", cast(uint)(mem
*T
.sizeof
), res
); }
144 import core
.stdc
.stdlib
: malloc
;
145 auto res
= malloc(mem
*T
.sizeof
);
146 if (res
is null) onOutOfMemoryError();
147 static if (is(T
== struct)) {
148 import core
.stdc
.string
: memcpy
;
149 static immutable T i
= T
.init
;
150 foreach (immutable idx
; 0..mem
) memcpy(res
+idx
, &i
, T
.sizeof
);
152 debug(arcz_alloc
) { import core
.stdc
.stdio
: printf
; printf("allocated %u bytes at %p\n", cast(uint)(mem
*T
.sizeof
), res
); }
157 static void xfree(T
) (T
* ptr
) {
159 import core
.stdc
.stdlib
: free
;
160 debug(arcz_alloc
) { import core
.stdc
.stdio
: printf
; printf("freing at %p\n", ptr
); }
165 static if (arcz_has_balz
) static ubyte balzDictSize (uint blockSize
) {
166 foreach (ubyte bits
; Balz
.MinDictBits
..Balz
.MaxDictBits
+1) {
167 if ((1U<<bits
) >= blockSize
) return bits
;
169 return Balz
.MaxDictBits
;
172 // unpack exactly `destlen` bytes
173 static if (arcz_has_balz
) static void unpackBlockBalz (void* dest
, uint destlen
, const(void)* src
, uint srclen
, uint blocksize
) {
175 bz
.reinit(balzDictSize(blocksize
));
177 auto dc
= bz
.decompress(
180 import core
.stdc
.string
: memcpy
;
181 if (ipos
>= srclen
) return 0;
182 uint rd
= destlen
-ipos
;
183 if (rd
> buf
.length
) rd
= cast(uint)buf
.length
;
184 memcpy(buf
.ptr
, src
+ipos
, rd
);
190 //if (opos+buf.length > destlen) throw new ArczException("error unpacking archive");
191 uint wr
= destlen
-opos
;
192 if (wr
> buf
.length
) wr
= cast(uint)buf
.length
;
194 import core
.stdc
.string
: memcpy
;
195 memcpy(dest
+opos
, buf
.ptr
, wr
);
202 if (opos
!= destlen
) throw new ArczException("error unpacking archive");
205 // unpack exactly `destlen` bytes
206 static if (arcz_has_dlzma
) static void unpackBlockLzma (void* dest
, uint destlen
, const(void)* src
, uint srclen
, uint blocksize
) {
207 if (srclen
< LZMA_PROPS_SIZE
) throw new ArczException("error unpacking LZMA data");
208 if (srclen
== LZMA_PROPS_SIZE
) {
209 if (destlen
== 0) return;
210 throw new ArczException("error unpacking LZMA data");
212 const(ubyte)* srcBytes
= cast(const(ubyte)*)src
;
213 ubyte* destBytes
= cast(ubyte*)dest
;
215 LzmaDec_Init(&lzdec
);
216 if (LzmaDec_Allocate(&lzdec
, srcBytes
, LZMA_PROPS_SIZE
, &lzmaDefAllocator
) != SZ_OK
) {
217 throw new ArczException("error allocating LZMA decoder");
219 scope(exit
) LzmaDec_Free(&lzdec
, &lzmaDefAllocator
);
220 srcBytes
+= LZMA_PROPS_SIZE
;
221 srclen
-= LZMA_PROPS_SIZE
;
223 usize dlen
= destlen
;
226 immutable dres
= LzmaDec_DecodeToBuf(&lzdec
, destBytes
, &dlen
, srcBytes
, &slen
, LZMA_FINISH_ANY
, &status
);
227 if (dres
!= SZ_OK || dlen
!= destlen
) throw new ArczException("error unpacking LZMA data");
230 static void unpackBlockZLib (void* dest
, uint destlen
, const(void)* src
, uint srclen
, uint blocksize
) {
234 // initialize unpacker
235 if (inflateInit2(&zs
, 15) != Z_OK
) throw new ArczException("can't initialize zlib");
236 scope(exit
) inflateEnd(&zs
);
237 zs
.next_in
= cast(typeof(zs
.next_in
))src
;
238 zs
.avail_in
= srclen
;
239 zs
.next_out
= cast(typeof(zs
.next_out
))dest
;
240 zs
.avail_out
= destlen
;
241 while (zs
.avail_out
> 0) {
242 auto err
= inflate(&zs
, Z_SYNC_FLUSH
);
243 if (err
!= Z_STREAM_END
&& err
!= Z_OK
) throw new ArczException("error unpacking archive");
244 if (err
== Z_STREAM_END
) break;
246 if (zs
.avail_out
!= 0) throw new ArczException("error unpacking archive");
249 static void unpackBlock (void* dest
, uint destlen
, const(void)* src
, uint srclen
, uint blocksize
, in Packer packer
) {
250 final switch (packer
) {
252 unpackBlockZLib(dest
, destlen
, src
, srclen
, blocksize
);
255 static if (arcz_has_balz
) {
256 unpackBlockBalz(dest
, destlen
, src
, srclen
, blocksize
);
259 throw new ArczException("no Balz support was compiled in ArcZ");
262 static if (arcz_has_dlzma
) {
263 unpackBlockLzma(dest
, destlen
, src
, srclen
, blocksize
);
266 throw new ArczException("no LZMA support was compiled in ArcZ");
272 this (in ArzArchive arc
) {
279 pragma(inline
, true);
283 ~this () { pragma(inline
, true); close(); }
285 void opAssign (in ArzArchive arc
) {
287 auto n
= cast(Nfo
*)arc
.nfop
;
294 void close () { pragma(inline
, true); decRef(); }
296 @property FileInfo
[string
] files () nothrow @trusted @nogc { return (nfop ? nfo
.files
: null); }
298 void openArchive (const(char)[] filename
) {
299 debug/*(arcz)*/ import core
.stdc
.stdio
: printf
;
301 scope(exit
) if (fl
!is null) fclose(fl
);
303 if (filename
.length
== 0) throw new ArczException("cannot open unnamed archive file");
304 if (filename
.length
< 2048) {
305 import core
.stdc
.stdlib
: alloca
;
306 auto tfn
= (cast(char*)alloca(filename
.length
+1))[0..filename
.length
+1];
307 tfn
[0..filename
.length
] = filename
[];
308 tfn
[filename
.length
] = 0;
309 fl
= fopen(tfn
.ptr
, "rb");
311 import core
.stdc
.stdlib
: malloc
, free
;
312 auto tfn
= (cast(char*)malloc(filename
.length
+1))[0..filename
.length
+1];
314 scope(exit
) free(tfn
.ptr
);
315 fl
= fopen(tfn
.ptr
, "rb");
318 if (fl
is null) throw new ArczException("cannot open archive file '"~filename
.idup
~"'");
322 if (sign
!= "CZA2") throw new ArczException("invalid archive file '"~filename
.idup
~"'");
323 switch (readUbyte(fl
)) {
324 case 0: packer
= Packer
.Zlib
; break;
325 case 1: packer
= Packer
.Balz
; break;
326 case 2: packer
= Packer
.Lzma
; break;
327 default: throw new ArczException("invalid version of archive file '"~filename
.idup
~"'");
329 uint indexofs
= readUint(fl
); // index offset in file
330 uint pkidxsize
= readUint(fl
); // packed index size
331 uint idxsize
= readUint(fl
); // unpacked index size
332 if (pkidxsize
== 0 || idxsize
== 0 || indexofs
== 0) throw new ArczException("invalid archive file '"~filename
.idup
~"'");
334 ubyte* idxbuf
= null;
335 scope(exit
) xfree(idxbuf
);
337 auto pib
= xalloc
!ubyte(pkidxsize
);
338 scope(exit
) xfree(pib
);
339 if (fseek(fl
, indexofs
, 0) < 0) throw new ArczException("seek error in archive file '"~filename
.idup
~"'");
340 readBuf(fl
, pib
[0..pkidxsize
]);
341 idxbuf
= xalloc
!ubyte(idxsize
);
342 unpackBlock(idxbuf
, idxsize
, pib
, pkidxsize
, idxsize
, packer
);
345 // parse index and build structures
349 if (idxsize
-idxbufpos
< ubyte.sizeof
) throw new ArczException("invalid index for archive file '"~filename
.idup
~"'");
350 return idxbuf
[idxbufpos
++];
354 if (idxsize
-idxbufpos
< uint.sizeof
) throw new ArczException("invalid index for archive file '"~filename
.idup
~"'");
356 import core
.bitop
: bswap;
357 uint v
= *cast(uint*)(idxbuf
+idxbufpos
);
360 } else version(LittleEndian
) {
361 uint v
= *cast(uint*)(idxbuf
+idxbufpos
);
365 static assert(0, "wtf?!");
369 void getBuf (void[] buf
) {
370 if (buf
.length
> 0) {
371 import core
.stdc
.string
: memcpy
;
372 if (idxsize
-idxbufpos
< buf
.length
) throw new ArczException("invalid index for archive file '"~filename
.idup
~"'");
373 memcpy(buf
.ptr
, idxbuf
+idxbufpos
, buf
.length
);
374 idxbufpos
+= buf
.length
;
378 // allocate shared info struct
379 Nfo
* nfo
= xalloc
!Nfo(1);
381 debug(arcz_rc
) { import core
.stdc
.stdio
: printf
; printf("Nfo %p allocated\n", nfo
); }
382 scope(failure
) decRef();
383 nfop
= cast(usize
)nfo
;
385 import core
.memory
: GC
;
386 GC
.addRange(nfo
, Nfo
.sizeof
);
388 nfo
.fileMutex
= new Mutex();
390 // read chunk info and data
392 nfo
.chunkSize
= getUint
;
393 auto ccount
= getUint
; // chunk count
394 nfo
.lastChunkSize
= getUint
;
395 debug(arcz_dirread
) printf("chunk size: %u\nchunk count: %u\nlast chunk size:%u\n", nfo
.chunkSize
, ccount
, nfo
.lastChunkSize
);
396 if (ccount
== 0 || nfo
.chunkSize
< 1 || nfo
.lastChunkSize
< 1 || nfo
.lastChunkSize
> nfo
.chunkSize
) throw new ArczException("invalid archive file '"~filename
.idup
~"'");
397 nfo
.chunks
.length
= ccount
;
398 // chunk offsets and sizes
399 foreach (ref ci
; nfo
.chunks
) {
403 // read file count and info
404 auto fcount
= getUint
;
405 if (fcount
== 0) throw new ArczException("empty archive file '"~filename
.idup
~"'");
406 // calc name buffer position and size
407 //immutable uint nbofs = idxbufpos+fcount*(5*4);
408 //if (nbofs >= idxsize) throw new ArczException("invalid index in archive file '"~filename.idup~"'");
409 //immutable uint nbsize = idxsize-nbofs;
410 debug(arcz_dirread
) printf("file count: %u\n", fcount
);
411 foreach (immutable _
; 0..fcount
) {
412 uint nameofs
= getUint
;
413 uint namelen
= getUint
;
416 //throw new ArczException("invalid archive file '"~filename.idup~"'");
417 getUint
; // chunk number
418 getUint
; // offset in chunk
419 getUint
; // unpacked size
420 debug(arcz_dirread
) printf("skipped empty file\n");
422 //if (nameofs >= nbsize || namelen > nbsize || nameofs+namelen > nbsize) throw new ArczException("invalid index in archive file '"~filename.idup~"'");
423 if (nameofs
>= idxsize || namelen
> idxsize || nameofs
+namelen
> idxsize
) throw new ArczException("invalid index in archive file '"~filename
.idup
~"'");
425 auto nb
= new char[](namelen
);
426 nb
[0..namelen
] = (cast(char*)idxbuf
)[nameofs
..nameofs
+namelen
];
427 fi
.name
= cast(string
)(nb
); // it is safe here
428 fi
.chunk
= getUint
; // chunk number
429 fi
.chunkofs
= getUint
; // offset in chunk
430 fi
.size
= getUint
; // unpacked size
431 debug(arcz_dirread
) printf("file size: %u\nfile chunk: %u\noffset in chunk:%u; name: [%.*s]\n", fi
.size
, fi
.chunk
, fi
.chunkofs
, cast(uint)fi
.name
.length
, fi
.name
.ptr
);
432 nfo
.files
[fi
.name
] = fi
;
435 // transfer achive file ownership
440 bool exists (const(char)[] name
) { if (nfop
) return ((name
in nfo
.files
) !is null); else return false; }
442 AZFile
open (const(char)[] name
) {
443 if (!nfop
) throw new ArczException("can't open file from non-opened archive");
444 if (auto fi
= name
in nfo
.files
) {
445 nfo
.fileMutex
.lock();
446 scope(exit
) nfo
.fileMutex
.unlock();
447 auto zl
= xalloc
!LowLevelPackedRO(1);
448 scope(failure
) xfree(zl
);
449 debug(arcz_rc
) { import core
.stdc
.stdio
: printf
; printf("Zl %p allocated\n", zl
); }
450 zl
.setup(nfo
, fi
.chunk
, fi
.chunkofs
, fi
.size
);
452 fl
.zlp
= cast(usize
)zl
;
455 throw new ArczException("can't open file '"~name
.idup
~"' from archive");
459 static struct LowLevelPackedRO
{
460 private import etc
.c
.zlib
;
463 usize nfop
; // hide it from GC
465 private @property inout(Nfo
*) nfo () inout pure nothrow @trusted @nogc { pragma(inline
, true); return cast(typeof(return))nfop
; }
466 static void decRef (usize me
) {
468 auto zl
= cast(LowLevelPackedRO
*)me
;
471 import core
.stdc
.stdlib
: free
;
472 if (zl
.chunkData
!is null) free(zl
.chunkData
);
473 version(arcz_use_more_memory
) if (zl
.pkdata
!is null) free(zl
.pkdata
);
476 debug(arcz_rc
) { import core
.stdc
.stdio
: printf
; printf("Zl %p freed\n", zl
); }
478 //debug(arcz_rc) { import core.stdc.stdio : printf; printf("Zl %p; rc after decRef is %u\n", zl, zl.rc); }
483 uint nextchunk
; // next chunk to read
484 uint curcpos
; // position in current chunk
485 uint curcsize
; // number of valid bytes in `chunkData`
486 uint stchunk
; // starting chunk
487 uint stofs
; // offset in starting chunk
488 uint totalsize
; // total file size
489 uint pos
; // current file position
490 uint lastrdpos
; // last actual read position
492 ubyte* chunkData
; // can be null
493 version(arcz_use_more_memory
) {
498 @disable this (this);
500 void setup (Nfo
* anfo
, uint astchunk
, uint astofs
, uint asize
) {
501 assert(anfo
!is null);
503 nfop
= cast(usize
)anfo
;
505 nextchunk
= stchunk
= astchunk
;
511 @property bool eof () pure nothrow @safe @nogc { pragma(inline
, true); return (pos
>= totalsize
); }
513 // return less than chunk size if our file fits in one non-full chunk completely
514 uint justEnoughMemory () pure const nothrow @safe @nogc {
515 pragma(inline
, true);
517 return nfo
.chunkSize
;
519 return (totalsize
< nfo
.chunkSize
&& stofs
+totalsize
< nfo
.chunkSize ? stofs
+totalsize
: nfo
.chunkSize
);
523 void unpackNextChunk () {
524 if (nfop
== 0) assert(0, "wtf?!");
525 //scope(failure) if (chunkData !is null) { xfree(chunkData); chunkData = null; }
526 debug(arcz_unp
) { import core
.stdc
.stdio
: printf
; printf("unpacking chunk %u\n", nextchunk
); }
527 // allocate buffer for unpacked data
528 if (chunkData
is null) {
529 // optimize things a little: if our file fits in less then one chunk, allocate "just enough" memory
530 chunkData
= xalloc
!(ubyte, false)(justEnoughMemory
);
532 auto chunk
= &nfo
.chunks
[nextchunk
];
533 if (chunk
.pksize
== nfo
.chunkSize
) {
534 // unpacked chunk, just read it
535 debug(arcz_unp
) { import core
.stdc
.stdio
: printf
; printf(" chunk is not packed\n"); }
537 nfo
.fileMutex
.lock();
538 scope(exit
) nfo
.fileMutex
.unlock();
539 if (fseek(nfo
.afl
, chunk
.ofs
, 0) < 0) throw new ArczException("ARCZ reading error");
540 if (fread(chunkData
, 1, nfo
.chunkSize
, nfo
.afl
) != nfo
.chunkSize
) throw new ArczException("ARCZ reading error");
542 curcsize
= nfo
.chunkSize
;
544 // packed chunk, unpack it
545 // allocate buffer for packed data
546 version(arcz_use_more_memory
) {
547 import core
.stdc
.stdlib
: realloc
;
548 if (pkdatasize
< chunk
.pksize
) {
549 import core
.exception
: onOutOfMemoryError
;
550 auto newpk
= realloc(pkdata
, chunk
.pksize
);
551 if (newpk
is null) onOutOfMemoryError();
552 debug(arcz_alloc
) { import core
.stdc
.stdio
: printf
; printf("reallocated from %u to %u bytes; %p -> %p\n", cast(uint)pkdatasize
, cast(uint)chunk
.pksize
, pkdata
, newpk
); }
553 pkdata
= cast(ubyte*)newpk
;
554 pkdatasize
= chunk
.pksize
;
558 auto pkd
= xalloc
!(ubyte, false)(chunk
.pksize
);
559 scope(exit
) xfree(pkd
);
562 nfo
.fileMutex
.lock();
563 scope(exit
) nfo
.fileMutex
.unlock();
564 if (fseek(nfo
.afl
, chunk
.ofs
, 0) < 0) throw new ArczException("ARCZ reading error");
565 if (fread(pkd
, 1, chunk
.pksize
, nfo
.afl
) != chunk
.pksize
) throw new ArczException("ARCZ reading error");
567 uint upsize
= (nextchunk
== nfo
.chunks
.length
-1 ? nfo
.lastChunkSize
: nfo
.chunkSize
); // unpacked chunk size
568 immutable uint cksz
= upsize
;
569 immutable uint jem
= justEnoughMemory
;
570 if (upsize
> jem
) upsize
= jem
;
571 debug(arcz_unp
) { import core
.stdc
.stdio
: printf
; printf(" unpacking %u bytes to %u bytes\n", chunk
.pksize
, upsize
); }
572 ArzArchive
.unpackBlock(chunkData
, upsize
, pkd
, chunk
.pksize
, cksz
, nfo
.packer
);
576 // fix first chunk offset if necessary
577 if (nextchunk
== stchunk
&& stofs
> 0) {
578 // it's easier to just memmove it
579 import core
.stdc
.string
: memmove
;
580 assert(stofs
< curcsize
);
581 memmove(chunkData
, chunkData
+stofs
, curcsize
-stofs
);
584 ++nextchunk
; // advance to next chunk
587 void syncReadPos () {
588 if (pos
>= totalsize || pos
== lastrdpos
) return;
589 immutable uint fcdata
= nfo
.chunkSize
-stofs
; // number of our bytes in the first chunk
590 // does our pos lie in the first chunk?
593 if (nextchunk
!= stchunk
+1) {
595 unpackNextChunk(); // we'll need it anyway
604 // find the chunk we want
605 uint npos
= pos
-fcdata
;
606 uint xblock
= stchunk
+1+npos
/nfo
.chunkSize
;
607 uint curcstart
= (xblock
-(stchunk
+1))*nfo
.chunkSize
+fcdata
;
608 if (xblock
!= nextchunk
-1) {
609 // read and unpack this chunk
616 assert(pos
>= curcstart
&& pos
< curcstart
+nfo
.chunkSize
);
617 uint skip
= pos
-curcstart
;
622 int read (void* buf
, uint count
) {
623 if (buf
is null) return -1;
624 if (count
== 0 || totalsize
== 0) return 0;
625 if (totalsize
>= 0 && pos
>= totalsize
) return 0; // EOF
627 assert(lastrdpos
== pos
);
628 if (cast(long)pos
+count
> totalsize
) count
= totalsize
-pos
;
631 debug(arcz_read
) { import core
.stdc
.stdio
: printf
; printf("reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count
, pos
, lastrdpos
, curcpos
, curcsize
); }
632 import core
.stdc
.string
: memcpy
;
633 if (curcpos
>= curcsize
) {
634 unpackNextChunk(); // we want next chunk!
635 debug(arcz_read
) { import core
.stdc
.stdio
: printf
; printf(" *reading %u bytes; pos=%u; lastrdpos=%u; curcpos=%u; curcsize=%u\n", count
, pos
, lastrdpos
, curcpos
, curcsize
); }
637 assert(curcpos
< curcsize
&& curcsize
!= 0);
638 int rd
= (curcsize
-curcpos
>= count ? count
: curcsize
-curcpos
);
640 memcpy(buf
, chunkData
+curcpos
, rd
);
647 assert(pos
== lastrdpos
);
651 long lseek (long ofs
, int origin
) {
652 //TODO: overflow checks
654 case SEEK_SET
: break;
655 case SEEK_CUR
: ofs
+= pos
; break;
657 if (ofs
> 0) ofs
= 0;
658 if (-ofs
> totalsize
) ofs
= -cast(long)totalsize
;
664 if (ofs
< 0) return -1;
665 if (totalsize
>= 0 && ofs
> totalsize
) ofs
= totalsize
;
673 // ////////////////////////////////////////////////////////////////////////// //
675 public struct AZFile
{
679 private @property inout(ArzArchive
.LowLevelPackedRO
)* zl () inout pure nothrow @trusted @nogc { pragma(inline
, true); return cast(typeof(return))zlp
; }
680 private void decRef () { pragma(inline
, true); ArzArchive
.LowLevelPackedRO
.decRef(zlp
); zlp
= 0; }
683 this (in AZFile afl
) {
693 ~this () { close(); }
695 void opAssign (in AZFile afl
) {
697 auto n
= cast(ArzArchive
.LowLevelPackedRO
*)afl
.zlp
;
704 void close () { decRef(); }
706 @property bool isOpen () const pure nothrow @safe @nogc { pragma(inline
, true); return (zlp
!= 0); }
707 @property uint size () const pure nothrow @safe @nogc { pragma(inline
, true); return (zlp ? zl
.totalsize
: 0); }
708 @property uint tell () const pure nothrow @safe @nogc { pragma(inline
, true); return (zlp ? zl
.pos
: 0); }
710 void seek (long ofs
, int origin
=SEEK_SET
) {
711 if (!zlp
) throw new ArczException("can't seek in closed file");
712 auto res
= zl
.lseek(ofs
, origin
);
713 if (res
< 0) throw new ArczException("seek error");
716 private import std
.traits
: isMutable
;
718 //TODO: overflow check
719 T
[] rawRead(T
) (T
[] buf
) if (isMutable
!T
) {
720 if (!zlp
) throw new ArczException("can't read from closed file");
721 if (buf
.length
> 0) {
722 auto res
= zl
.read(buf
.ptr
, buf
.length
*T
.sizeof
);
723 if (res
== -1 || res
%T
.sizeof
!= 0) throw new ArczException("read error");
724 return buf
[0..res
/T
.sizeof
];
732 // ////////////////////////////////////////////////////////////////////////// //
733 /** this class can be used to create archive file.
736 * --------------------
737 * import std.file, std.path, std.stdio : File;
739 * enum ArcName = "z00.arz";
740 * enum DirName = "experimental-docs";
743 * rdbuf.length = 65536;
745 * auto arcz = new ArzCreator(ArcName);
747 * foreach (DirEntry e; dirEntries(DirName, SpanMode.breadth)) {
749 * assert(e.size < uint.max);
752 * string fname = e.name[DirName.length+1..$];
753 * arcz.newFile(fname, cast(uint)e.size);
754 * auto fi = File(e.name);
756 * auto rd = fi.rawRead(rdbuf[]);
757 * if (rd.length == 0) break;
758 * arcz.rawWrite(rd[]);
763 * writeln(total, " bytes packed to ", getSize(ArcName), " (", arcz.chunksWritten, " chunks, ", arcz.filesWritten, " files)");
764 * --------------------
766 final class ArzCreator
{
767 private import etc
.c
.zlib
;
768 private import core
.stdc
.stdio
: FILE
, fopen
, fclose
, ftell
, fseek
, fwrite
;
771 //WARNING! don't change the order!
775 BalzMax
, // Balz, maximum compression
776 Zopfli
, // this will fallback to zlib if no zopfli support was compiled in
781 static struct ChunkInfo
{
782 uint ofs
; // offset in file
783 uint pksize
; // packed chunk size
786 static struct FileInfo
{
789 uint chunkofs
; // offset of first file byte in unpacked chunk
790 uint size
; // unpacked file size
800 uint statChunks
, statFiles
;
801 Compressor cpr
= Compressor
.ZLib
;
804 void writeUint (uint v
) {
805 if (arcfl
is null) throw new ArczException("write error");
807 import core
.bitop
: bswap;
809 } else version(LittleEndian
) {
812 static assert(0, "wtf?!");
814 if (fwrite(&v
, 1, v
.sizeof
, arcfl
) != v
.sizeof
) throw new ArczException("write error"); // signature
817 void writeUbyte (ubyte v
) {
818 if (arcfl
is null) throw new ArczException("write error");
819 if (fwrite(&v
, 1, v
.sizeof
, arcfl
) != v
.sizeof
) throw new ArczException("write error"); // signature
822 void writeBuf (const(void)[] buf
) {
823 if (buf
.length
> 0) {
824 if (arcfl
is null) throw new ArczException("write error");
825 if (fwrite(buf
.ptr
, 1, buf
.length
, arcfl
) != buf
.length
) throw new ArczException("write error");
829 static if (arcz_has_balz
) long writePackedBalz (const(void)[] upbuf
) {
830 assert(upbuf
.length
> 0 && upbuf
.length
< int.max
);
834 bz
.reinit(ArzArchive
.balzDictSize(cast(uint)upbuf
.length
));
838 import core
.stdc
.string
: memcpy
;
839 if (ipos
>= upbuf
.length
) return 0;
840 uint rd
= cast(uint)upbuf
.length
-ipos
;
841 if (rd
> buf
.length
) rd
= cast(uint)buf
.length
;
842 memcpy(buf
.ptr
, upbuf
.ptr
+ipos
, rd
);
852 (cpr
== Compressor
.BalzMax
)
857 static if (arcz_has_zopfli
) long writePackedZopfli (const(void)[] upbuf
) {
862 ZopfliCompress(opts
, ZOPFLI_FORMAT_ZLIB
, upbuf
.ptr
, upbuf
.length
, &odata
, &osize
);
863 writeBuf(odata
[0..osize
]);
865 return cast(long)osize
;
868 static if (arcz_has_dlzma
) long writePackedLzma (const(void)[] upbuf
) {
869 import core
.stdc
.stdlib
: malloc
, free
;
870 assert(upbuf
.length
> 0 && upbuf
.length
< int.max
);
872 CLzmaEncHandle lzenc
= LzmaEnc_Create(&lzmaDefAllocator
);
873 if (lzenc
is null) throw new ArczException("cannot allocate LZMA encoder");
874 scope(exit
) LzmaEnc_Destroy(lzenc
, &lzmaDefAllocator
, &lzmaDefAllocator
);
877 LzmaEncProps_Init(&props
);
879 //props.dictSize = 1;
880 //while (props.dictSize < insize) props.dictSize <<= 1;
881 //props.dictSize = 1<<27; //128MB
882 props
.dictSize
= 1<<22; //4MB
883 props
.reduceSize
= upbuf
.length
;
885 if (LzmaEnc_SetProps(lzenc
, &props
) != SZ_OK
) throw new ArczException("cannot init LZMA encoder");
886 LzmaEnc_SetDataSize(lzenc
, upbuf
.length
);
891 ISeqInStream inStream
;
892 inStream
.Read
= delegate SRes (ISeqInStream
* p
, void* buf
, usize
* size
) nothrow {
893 import core
.stdc
.string
: memcpy
;
894 usize inleft
= upbuf
.length
-inpos
;
895 if (inleft
> *size
) inleft
= *size
;
896 if (inleft
) memcpy(buf
, (cast(const(ubyte)*)upbuf
)+inpos
, inleft
);
902 ISeqOutStream outStream
;
903 outStream
.Write
= delegate usize (ISeqOutStream
* p
, const(void)* buf
, usize size
) nothrow {
905 writeBuf((cast(const(ubyte)*)buf
)[0..size
]);
906 } catch (Exception e
) {
913 ubyte[LZMA_PROPS_SIZE
+12] propsEnc
;
914 uint propsEncSize
= cast(uint)propsEnc
.length
;
916 if (LzmaEnc_WriteProperties(lzenc
, propsEnc
.ptr
, &propsEncSize
) != SZ_OK
) throw new ArczException("cannot encode LZMA properties");
917 if (propsEncSize
!= LZMA_PROPS_SIZE
) throw new ArczException("invalid LZMA properties size");
918 writeBuf(propsEnc
[0..propsEncSize
]);
919 outsize
= propsEncSize
;
921 immutable eres
= LzmaEnc_Encode(lzenc
, &outStream
, &inStream
, null, &lzmaDefAllocator
, &lzmaDefAllocator
);
922 if (eres
!= SZ_OK
) throw new ArczException("error in LZMA packer");
924 return cast(long)outsize
;
927 long writePackedZLib (const(void)[] upbuf
) {
928 assert(upbuf
.length
> 0 && upbuf
.length
< int.max
);
932 zs
.next_out
= obuf
.ptr
;
933 zs
.avail_out
= cast(uint)obuf
.length
;
937 if (deflateInit2(&zs
, Z_BEST_COMPRESSION
, Z_DEFLATED
, 15, 9, 0) != Z_OK
) throw new ArczException("can't write packed data");
938 scope(exit
) deflateEnd(&zs
);
939 zs
.next_in
= cast(typeof(zs
.next_in
))upbuf
.ptr
;
940 zs
.avail_in
= cast(uint)upbuf
.length
;
941 while (zs
.avail_in
> 0) {
942 if (zs
.avail_out
== 0) {
943 res
+= cast(uint)obuf
.length
;
945 zs
.next_out
= obuf
.ptr
;
946 zs
.avail_out
= cast(uint)obuf
.length
;
948 auto err
= deflate(&zs
, Z_NO_FLUSH
);
949 if (err
!= Z_OK
) throw new ArczException("zlib compression error");
951 while (zs
.avail_out
!= obuf
.length
) {
952 res
+= cast(uint)obuf
.length
-zs
.avail_out
;
953 writeBuf(obuf
[0..$-zs
.avail_out
]);
954 zs
.next_out
= obuf
.ptr
;
955 zs
.avail_out
= cast(uint)obuf
.length
;
956 auto err
= deflate(&zs
, Z_FINISH
);
957 if (err
!= Z_OK
&& err
!= Z_STREAM_END
) throw new ArczException("zlib compression error");
958 // succesfully flushed?
959 //if (err != Z_STREAM_END) throw new ArczException("zlib compression error");
964 // return size of packed data written
965 uint writePackedBuf (const(void)[] upbuf
) {
966 assert(upbuf
.length
> 0 && upbuf
.length
< int.max
);
969 case Compressor
.ZLib
:
970 res
= writePackedZLib(upbuf
);
972 case Compressor
.Balz
:
973 case Compressor
.BalzMax
:
974 static if (arcz_has_balz
) {
975 res
= writePackedBalz(upbuf
);
978 new ArczException("no Balz support was compiled in ArcZ");
980 case Compressor
.Zopfli
:
981 static if (arcz_has_zopfli
) {
982 res
= writePackedZopfli(upbuf
);
985 //new ArczException("no Zopfli support was compiled in ArcZ");
986 res
= writePackedZLib(upbuf
);
989 case Compressor
.Lzma
:
990 static if (arcz_has_dlzma
) {
991 res
= writePackedLzma(upbuf
);
994 new ArczException("no LZMA support was compiled in ArcZ");
997 if (res
> uint.max
) throw new ArczException("output archive too big");
998 return cast(uint)res
;
1004 auto pos
= ftell(arcfl
);
1005 if (pos
< 0 || pos
>= uint.max
) throw new ArczException("output archive too big");
1006 ci
.ofs
= cast(uint)pos
;
1007 auto wlen
= writePackedBuf(chunkdata
[0..cdpos
]);
1009 if (cdpos
== chunkdata
.length
&& ci
.pksize
>= chunkdata
.length
) {
1010 // wow, this chunk is unpackable
1011 //{ import std.stdio; writeln("unpackable chunk found!"); }
1012 if (fseek(arcfl
, pos
, 0) < 0) throw new ArczException("can't seek in output file");
1013 writeBuf(chunkdata
[0..cdpos
]);
1015 import core
.stdc
.stdio
: fileno
;
1016 import core
.sys
.posix
.unistd
: ftruncate
;
1018 if (pos
< 0 || pos
>= uint.max
) throw new ArczException("output archive too big");
1019 if (ftruncate(fileno(arcfl
), cast(uint)pos
) < 0) throw new ArczException("error truncating output file");
1023 if (cdpos
< chunkdata
.length
) lastChunkSize
= cast(uint)cdpos
;
1027 lastChunkSize
= cast(uint)chunkdata
.length
;
1034 //assert(ftell(arcfl) > 0 && ftell(arcfl) < uint.max);
1035 assert(chunkdata
.length
< uint.max
);
1036 assert(chunks
.length
< uint.max
);
1037 assert(files
.length
< uint.max
);
1038 // create index in memory
1041 void putUint (uint v
) {
1043 index
~= (v
>>8)&0xff;
1044 index
~= (v
>>16)&0xff;
1045 index
~= (v
>>24)&0xff;
1048 void putUbyte (ubyte v
) {
1052 void putBuf (const(void)[] buf
) {
1053 assert(buf
.length
> 0);
1054 index
~= (cast(const(ubyte)[])buf
)[];
1057 // create index in memory
1060 putUint(cast(uint)chunkdata
.length
);
1062 putUint(cast(uint)chunks
.length
);
1064 putUint(lastChunkSize
); // 0: last chunk is full
1065 // chunk offsets and sizes
1066 foreach (ref ci
; chunks
) {
1071 putUint(cast(uint)files
.length
);
1072 uint nbofs
= cast(uint)index
.length
+cast(uint)files
.length
*(5*4);
1075 foreach (ref fi
; files
) {
1076 // name: length(byte), chars
1077 assert(fi
.name
.length
> 0 && fi
.name
.length
<= 16384);
1079 putUint(cast(uint)fi
.name
.length
);
1080 nbofs
+= cast(uint)fi
.name
.length
+1; // put zero byte there to ease C interfacing
1081 //putBuf(fi.name[]);
1084 // offset in unpacked chunk
1085 putUint(fi
.chunkofs
);
1090 foreach (ref fi
; files
) {
1092 putUbyte(0); // this means nothing, it is here just for convenience (hello, C!)
1094 assert(index
.length
< uint.max
);
1096 auto cpos
= ftell(arcfl
);
1097 if (cpos
< 0 || cpos
> uint.max
) throw new ArczException("output archive too big");
1098 // write packed index
1099 debug(arcz_writer
) { import core
.stdc
.stdio
: pinrtf
; printf("index size: %u\n", cast(uint)index
.length
); }
1100 auto pkisz
= writePackedBuf(index
[]);
1101 debug(arcz_writer
) { import core
.stdc
.stdio
: pinrtf
; printf("packed index size: %u\n", cast(uint)pkisz
); }
1103 if (fseek(arcfl
, 5, 0) < 0) throw new ArczException("seek error");
1104 // index offset in file
1106 // packed index size
1108 // unpacked index size
1109 writeUint(cast(uint)index
.length
);
1111 statChunks
= cast(uint)chunks
.length
;
1112 statFiles
= cast(uint)files
.length
;
1116 this (const(char)[] fname
, uint chunkSize
=256*1024, Compressor acpr
=Compressor
.ZLib
) {
1117 import std
.internal
.cstring
;
1118 assert(chunkSize
> 0 && chunkSize
< 32*1024*1024); // arbitrary limit
1119 static if (!arcz_has_balz
) {
1120 if (acpr
== Compressor
.Balz || acpr
== Compressor
.BalzMax
) throw new ArczException("no Balz support was compiled in ArcZ");
1122 static if (!arcz_has_zopfli
) {
1123 //if (acpr == Compressor.Zopfli) throw new ArczException("no Zopfli support was compiled in ArcZ");
1126 arcfl
= fopen(fname
.tempCString
, "wb");
1127 if (arcfl
is null) throw new ArczException("can't create output file '"~fname
.idup
~"'");
1129 chunkdata
.length
= chunkSize
;
1130 scope(failure
) { fclose(arcfl
); arcfl
= null; }
1131 writeBuf("CZA2"); // signature
1132 if (cpr
== Compressor
.Balz || cpr
== Compressor
.BalzMax
) {
1133 writeUbyte(1); // compressor
1134 } else if (cpr
== Compressor
.Lzma
) {
1135 writeUbyte(2); // compressor
1137 writeUint(0); // offset to index
1138 writeUint(0); // packed index size
1139 writeUint(0); // unpacked index size
1142 ~this () { close(); }
1145 if (arcfl
!is null) {
1146 scope(exit
) { fclose(arcfl
); arcfl
= null; }
1156 // valid after closing
1157 @property uint chunksWritten () const pure nothrow @safe @nogc { pragma(inline
, true); return statChunks
; }
1158 @property uint filesWritten () const pure nothrow @safe @nogc { pragma(inline
, true); return statFiles
; }
1160 void newFile (string name
, uint size
) {
1162 assert(name
.length
<= 255);
1164 fi
.chunk
= cast(uint)chunks
.length
;
1165 fi
.chunkofs
= cast(uint)cdpos
;
1170 void rawWrite(T
) (const(T
)[] buffer
) {
1171 if (buffer
.length
> 0) {
1172 auto src
= cast(const(ubyte)*)buffer
.ptr
;
1173 auto len
= buffer
.length
*T
.sizeof
;
1175 if (cdpos
== chunkdata
.length
) flushData();
1176 if (cdpos
< chunkdata
.length
) {
1177 auto wr
= chunkdata
.length
-cdpos
;
1178 if (wr
> len
) wr
= len
;
1179 chunkdata
[cdpos
..cdpos
+wr
] = src
[0..wr
];
1190 // ////////////////////////////////////////////////////////////////////////// //
1191 /* arcz file format:
1194 db 'CZA2' ; signature
1195 db version ; 0: zlib; 1: balz; 2: lzma
1196 dd indexofs ; offset to packed index
1197 dd pkindexsz ; size of packed index
1198 dd upindexsz ; size of unpacked index
1203 dd chunksize ; unpacked chunk size in bytes
1204 dd chunkcount ; number of chunks in file
1205 dd lastchunksz ; size of last chunk (it may be incomplete); 0: last chunk is completely used (all `chunksize` bytes)
1207 then chunk offsets and sizes follows:
1208 dd chunkofs ; from file start
1209 dd pkchunksz ; size of (possibly packed) chunk data; if it equals to `chunksize`, this chunk is not packed
1211 then file list follows:
1212 dd filecount ; number of files in archive
1214 then file info follows:
1215 dd nameofs ; (in index)
1216 dd namelen ; length of name (can't be 0)
1217 dd firstchunk ; chunk where file starts
1218 dd firstofs ; offset in first chunk (unpacked) where file starts
1219 dd filesize ; unpacked file size
1221 then name buffer follows -- just bytes