added licence (GPLv3, and all previous versions except one uploaded to Abandonia...
[awish.git] / src / librnc / librnc.c
blob4adfb90da917ad0f0a7570e636ec528e5129b98e
1 /*
2 * This program is free software: you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation, either version 3 of the License, or
5 * (at your option) any later version.
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
19 #include "librnc.h"
22 ////////////////////////////////////////////////////////////////////////////////
23 typedef struct {
24 unsigned int bitbuf; /* holds between 16 and 32 bits */
25 int bitcount; /* how many bits does bitbuf hold? */
26 } BitStream;
29 typedef struct {
30 int num; /* number of nodes in the tree */
31 struct {
32 unsigned int code;
33 int codelen;
34 int value;
35 } table[32];
36 } HuffmanTable;
39 ////////////////////////////////////////////////////////////////////////////////
41 * Return the big-endian longword at p.
43 static inline unsigned int blong (const unsigned char *p) {
44 unsigned int n;
46 n = p[0];
47 n = (n<<8)+p[1];
48 n = (n<<8)+p[2];
49 n = (n<<8)+p[3];
50 return n;
55 * Return the big-endian word at p.
57 static inline unsigned int bword (const unsigned char *p) {
58 unsigned int n;
60 n = p[0];
61 n = (n<<8)+p[1];
62 return n;
67 * Return the little-endian word at p.
69 static inline unsigned int lword (const unsigned char *p) {
70 unsigned int n;
72 n = p[1];
73 n = (n<<8)+p[0];
74 return n;
79 * Mirror the bottom n bits of x.
81 static inline unsigned int mirror (unsigned int x, int n) {
82 unsigned int top = 1<<(n-1), bottom = 1;
84 while (top > bottom) {
85 unsigned int mask = top|bottom;
86 unsigned int masked = x&mask;
88 if (masked != 0 && masked != mask) x ^= mask;
89 top >>= 1;
90 bottom <<= 1;
92 return x;
97 * Initialises a bit stream with the first two bytes of the packed
98 * data.
100 static inline void bitread_init (BitStream *bs, const unsigned char **p) {
101 bs->bitbuf = lword(*p);
102 bs->bitcount = 16;
107 * Fixes up a bit stream after literals have been read out of the
108 * data stream.
110 static inline void bitread_fix (BitStream *bs, const unsigned char **p) {
111 bs->bitcount -= 16;
112 bs->bitbuf &= (1<<bs->bitcount)-1; /* remove the top 16 bits */
113 bs->bitbuf |= (lword(*p)<<bs->bitcount); /* replace with what's at *p */
114 bs->bitcount += 16;
119 * Returns some bits.
121 static inline unsigned int bit_peek (BitStream *bs, unsigned int mask) {
122 return bs->bitbuf&mask;
127 * Advances the bit stream.
129 static inline void bit_advance (BitStream *bs, int n, const unsigned char **p) {
130 bs->bitbuf >>= n;
131 bs->bitcount -= n;
132 if (bs->bitcount < 16) {
133 (*p) += 2;
134 bs->bitbuf |= (lword(*p)<<bs->bitcount);
135 bs->bitcount += 16;
141 * Reads some bits in one go (ie the above two routines combined).
143 static inline unsigned int bit_read (BitStream *bs, unsigned int mask, int n, const unsigned char **p) {
144 unsigned int result = bit_peek(bs, mask);
146 bit_advance(bs, n, p);
147 return result;
152 * Read a Huffman table out of the bit stream and data stream given.
154 static int read_huftable (HuffmanTable *h, BitStream *bs, const unsigned char **p) {
155 int k, num;
156 int leaflen[32];
157 int leafmax;
158 unsigned int codeb; /* big-endian form of code */
160 num = bit_read(bs, 0x1f, 5, p);
161 if (!num) return 0;
162 leafmax = 1;
163 for (int i = 0; i < num; ++i) {
164 leaflen[i] = bit_read(bs, 0x0F, 4, p);
165 if (leafmax < leaflen[i]) leafmax = leaflen[i];
167 codeb = 0L;
168 k = 0;
169 for (int i = 1; i <= leafmax; ++i) {
170 for (int j = 0; j < num; ++j) {
171 if (leaflen[j] == i) {
172 h->table[k].code = mirror(codeb, i);
173 h->table[k].codelen = i;
174 h->table[k].value = j;
175 ++codeb;
176 ++k;
179 codeb <<= 1;
181 h->num = k;
182 return 0;
187 * Read a value out of the bit stream using the given Huffman table.
189 static unsigned int huf_read (HuffmanTable *h, BitStream *bs, const unsigned char **p) {
190 int i;
191 unsigned int val;
193 for (i = 0; i < h->num; i++) {
194 unsigned int mask = (1<<h->table[i].codelen)-1;
196 if (bit_peek(bs, mask) == h->table[i].code) break;
198 if (i == h->num) return (unsigned int)-1;
199 bit_advance(bs, h->table[i].codelen, p);
200 val = h->table[i].value;
201 if (val >= 2) {
202 val = 1<<(val-1);
203 val |= bit_read(bs, val-1, h->table[i].value-1, p);
205 return val;
209 ////////////////////////////////////////////////////////////////////////////////
211 * Decompress a packed data block. Returns the unpacked length if
212 * successful, or negative error codes if not.
214 * If COMPRESSOR is defined, it also returns the leeway number
215 * (which gets stored at offset 16 into the compressed-file header)
216 * in `*leeway', if `leeway' isn't NULL.
218 int rnc_unpack (const void *packed, void *unpacked, int *leeway) {
219 const unsigned char *input = packed;
220 const unsigned char *inputend;
221 unsigned char *output = unpacked;
222 unsigned char *outputend;
223 BitStream bs;
224 HuffmanTable raw, dist, len;
225 unsigned int ch_count;
226 unsigned int ret_len;
227 unsigned out_crc;
228 int lee = 0;
230 if (blong(input) != RNC_SIGNATURE) return RNC_FILE_IS_NOT_RNC;
231 ret_len = blong(input+4);
232 outputend = output+ret_len;
233 inputend = input+18+blong(input+8);
234 input += 18; /* skip header */
235 // check the packed-data CRC. Also save the unpacked-data CRC for later
236 if (rnc_crc(input, inputend-input) != bword(input-4)) return RNC_PACKED_CRC_ERROR;
237 out_crc = bword(input-6);
238 bitread_init(&bs, &input);
239 bit_advance(&bs, 2, &input); /* discard first two bits */
240 // process chunks
241 while (output < outputend) {
242 int this_lee;
244 read_huftable(&raw, &bs, &input);
245 read_huftable(&dist, &bs, &input);
246 read_huftable(&len, &bs, &input);
247 ch_count = bit_read(&bs, 0xffff, 16, &input);
249 for (;;) {
250 int length, posn;
252 length = huf_read(&raw, &bs, &input);
253 if (length == -1) return RNC_HUF_DECODE_ERROR;
254 if (length) {
255 while (length--) *output++ = *input++;
256 bitread_fix(&bs, &input);
258 if (--ch_count <= 0) break;
259 posn = huf_read(&dist, &bs, &input);
260 if (posn == -1) return RNC_HUF_DECODE_ERROR;
261 length = huf_read(&len, &bs, &input);
262 if (length == -1) return RNC_HUF_DECODE_ERROR;
263 posn += 1;
264 length += 2;
265 while (length--) {
266 *output = output[-posn];
267 ++output;
269 this_lee = (inputend-input)-(outputend-output);
270 if (lee < this_lee) lee = this_lee;
274 if (outputend != output) return RNC_FILE_SIZE_MISMATCH;
275 if (leeway) *leeway = lee;
276 // check the unpacked-data CRC
277 if (rnc_crc(outputend-ret_len, ret_len) != out_crc) return RNC_UNPACKED_CRC_ERROR;
278 return ret_len;
282 ////////////////////////////////////////////////////////////////////////////////
284 * Return the uncompressed length of a packed data block, or a
285 * negative error code.
287 int rnc_ulen (const void *packed) {
288 const unsigned char *p = packed;
290 if (blong(p) != RNC_SIGNATURE) return RNC_FILE_IS_NOT_RNC;
291 return blong(p+4);
295 ////////////////////////////////////////////////////////////////////////////////
297 * Calculate a CRC, the RNC way.
299 int rnc_crc (const void *data, int len) {
300 static int initialized = 0;
301 static unsigned short crctab[256];
302 unsigned short val;
303 const unsigned char *p = data;
305 if (!initialized) {
306 for (int i = 0; i < 256; ++i) {
307 val = i;
308 for (int j = 0; j < 8; ++j) if (val&1) val = (val>>1)^0xA001; else val = (val>>1);
309 crctab[i] = val;
313 val = 0;
314 while (len-- > 0) {
315 val ^= *p++;
316 val = (val>>8)^crctab[val&0xFF];
319 return val;
323 ////////////////////////////////////////////////////////////////////////////////
325 * Return an error string corresponding to an error return code.
327 const char *rnc_error (int errcode) {
328 static const char *const errors[] = {
329 "No error",
330 "File is not RNC-1 format",
331 "Huffman decode error",
332 "File size mismatch",
333 "CRC error in packed data",
334 "CRC error in unpacked data",
335 "Unknown error"
338 errcode = -errcode;
339 if (errcode < 0) errcode = 0;
340 if (errcode > sizeof(errors)/sizeof(*errors)-1) errcode = sizeof(errors)/sizeof(*errors)-1;
341 return errors[errcode];
345 ////////////////////////////////////////////////////////////////////////////////
346 // RNC packer
348 #define BLOCKMAX (8192)
349 #define WINMAX (32767)
350 #define MAXTUPLES (4096)
351 /* it's prime */
352 #define HASHMAX (509)
353 #define PACKED_DELTA (65536)
356 typedef struct {
357 struct {
358 unsigned int code;
359 int codelen;
360 } table[32];
361 } PackerHuffmanTable;
364 typedef struct {
365 int rawlen;
366 int pos;
367 int len;
368 } Tuple;
371 static unsigned char blk[WINMAX];
372 static int linkp[WINMAX];
373 static int hashp[HASHMAX];
374 static int blkstart, bpos;
375 static int blklen;
377 static Tuple tuples[MAXTUPLES];
378 static int ntuple;
380 static unsigned char *packed;
381 static int packedlen;
382 static int packedpos, bitpos, bitcount, bitbuf;
385 static inline int hash (const unsigned char *a) {
386 return ((a[0]*7+a[1])*7+a[2])%HASHMAX;
390 static inline int pklength (unsigned value) {
391 int ret = 0;
393 while (value != 0) { value >>= 1; ++ret; }
394 return ret;
399 * Write big-endian int.
401 static inline void bwrite (unsigned char *p, unsigned int val) {
402 p[3] = val;
403 val >>= 8;
404 p[2] = val;
405 val >>= 8;
406 p[1] = val;
407 val >>= 8;
408 p[0] = val;
412 static inline void emit_raw (int n) {
413 tuples[ntuple].rawlen += n;
414 bpos += n;
418 static inline void emit_pair (int pos, int len) {
419 tuples[ntuple].pos = bpos-pos;
420 tuples[ntuple].len = len;
421 tuples[++ntuple].rawlen = 0;
422 bpos += len;
426 static inline void check_size (void) {
427 if (packedpos > packedlen-16) {
428 packedlen += PACKED_DELTA;
429 packed = realloc(packed, packedlen);
430 if (packed == NULL) {
431 perror("realloc");
432 exit(1);
438 static inline void write_literal (unsigned value) {
439 packed[packedpos++] = value;
440 check_size();
444 static inline void write_bits (unsigned value, int nbits) {
445 value &= (1 << nbits)-1;
446 bitbuf |= (value << bitcount);
447 bitcount += nbits;
449 if (bitcount > 16) {
450 packed[bitpos] = bitbuf;
451 bitbuf >>= 8;
452 packed[bitpos+1] = bitbuf;
453 bitbuf >>= 8;
454 bitcount -= 16;
455 bitpos = packedpos;
456 packedpos += 2;
457 check_size();
462 static void write_hval (PackerHuffmanTable *h, unsigned value) {
463 int len = pklength(value);
465 write_bits(h->table[len].code, h->table[len].codelen);
466 if (len >= 2) write_bits(value, len-1);
470 static void write_huf (PackerHuffmanTable *h) {
471 int j = 0;
473 for (int i = 0; i < 32; ++i) if (h->table[i].codelen > 0) j = i+1;
474 write_bits(j, 5);
475 for (int i = 0; i < j; ++i) write_bits(h->table[i].codelen, 4);
479 static void build_huf (PackerHuffmanTable *h, int *freqs) {
480 struct hnode {
481 int freq, parent, lchild, rchild;
482 } pool[64];
483 int j, k, m, toobig;
484 int maxcodelen;
485 unsigned int codeb;
487 j = 0; /* j counts nodes in the pool */
488 toobig = 1;
489 for (int i = 0; i < 32; ++i) {
490 if (freqs[i]) {
491 pool[j].freq = freqs[i];
492 pool[j].parent = -1;
493 pool[j].lchild = -1;
494 pool[j].rchild = i;
495 ++j;
496 toobig += freqs[i];
499 k = j; /* k counts _free_ nodes in the pool */
500 while (k > 1) {
501 int min = toobig;
502 int nextmin = toobig+1;
503 int minpos = 0, nextminpos = 0;
505 /* loop through free nodes */
506 for (int i = 0; i < j; ++i) {
507 if (pool[i].parent == -1) {
508 if (min > pool[i].freq) {
509 nextmin = min;
510 nextminpos = minpos;
511 min = pool[i].freq;
512 minpos = i;
513 } else if (nextmin > pool[i].freq) {
514 nextmin = pool[i].freq;
515 nextminpos = i;
519 pool[j].freq = min + nextmin;
520 pool[j].parent = -1;
521 pool[j].lchild = minpos;
522 pool[j].rchild = nextminpos;
523 pool[minpos].parent = j;
524 pool[nextminpos].parent = j;
525 ++j;
526 --k;
529 for (int i = 0; i < 32; ++i) h->table[i].codelen = 0;
531 maxcodelen = 0;
532 /* loop through original nodes */
533 for (int i = 0; i < j; ++i) {
534 if (pool[i].lchild == -1) {
535 m = 0;
536 k = i;
537 while (pool[k].parent != -1) { ++m; k = pool[k].parent; }
538 h->table[pool[i].rchild].codelen = m;
539 if (maxcodelen < m) maxcodelen = m;
543 codeb = 0;
544 for (int i = 1; i <= maxcodelen; ++i) {
545 for (int j = 0; j < 32; ++j) {
546 if (h->table[j].codelen == i) {
547 h->table[j].code = mirror(codeb, i);
548 ++codeb;
551 codeb <<= 1;
556 static void write_block (void) {
557 int lengths[32];
558 PackerHuffmanTable raw, dist, len;
559 int k;
561 for (int i = 0; i < 32; ++i) lengths[i] = 0;
562 for (int i = 0; i <= ntuple; ++i) ++(lengths[pklength(tuples[i].rawlen)]);
563 build_huf(&raw, lengths);
564 write_huf(&raw);
566 for (int i = 0; i < 32; ++i) lengths[i] = 0;
567 for (int i = 0; i < ntuple; ++i) ++(lengths[pklength(tuples[i].pos-1)]);
568 build_huf(&dist, lengths);
569 write_huf(&dist);
571 for (int i = 0; i < 32; ++i) lengths[i] = 0;
572 for (int i = 0; i < ntuple; ++i) ++(lengths[pklength(tuples[i].len-2)]);
573 build_huf(&len, lengths);
574 write_huf(&len);
576 write_bits(ntuple+1, 16);
578 k = blkstart;
579 for (int i = 0; i<=ntuple; ++i) {
580 write_hval (&raw, tuples[i].rawlen);
581 for (int j = 0; j<tuples[i].rawlen; ++j) write_literal(blk[k++]);
582 if (i == ntuple) break;
583 write_hval(&dist, tuples[i].pos-1);
584 write_hval(&len, tuples[i].len-2);
585 k += tuples[i].len;
590 static void do_block (void) {
591 int lazylen = 0, lazypos = 0, lazyraw = 0;
592 int hashv, ph, h;
594 bpos = 0;
595 while (bpos < blkstart) {
596 hashv = hash(blk+bpos);
597 h = hashp[hashv];
598 ph = -1;
599 while (h != -1) {
600 ph = h;
601 h = linkp[h];
603 if (ph != -1) linkp[ph] = bpos; else hashp[hashv] = bpos;
604 linkp[bpos] = -1;
605 ++bpos;
608 while (bpos < blklen && ntuple < MAXTUPLES-1) {
609 if (blklen-bpos < 3) {
610 emit_raw(blklen-bpos);
611 } else {
612 int len, maxlen, maxlenpos;
613 int savebpos;
615 hashv = hash(blk+bpos);
616 h = hashp[hashv];
618 maxlen = 0;
619 maxlenpos = ph = -1;
621 while (h != -1) {
622 unsigned char *p = blk+bpos;
623 unsigned char *v = blk+h;
625 len = 0;
626 while (*p == *v && p < blk+blklen) { ++p; ++v; ++len; }
627 if (maxlen < len) {
628 maxlen = len;
629 maxlenpos = h;
631 ph = h;
632 h = linkp[h];
635 if (ph != -1) linkp[ph] = bpos; else hashp[hashv] = bpos;
637 linkp[bpos] = -1;
638 savebpos = bpos;
640 bpos -= lazyraw;
641 if (lazyraw) {
642 if (maxlen >= lazylen+2) {
643 emit_raw(lazyraw);
644 lazyraw = 1;
645 lazypos = maxlenpos;
646 lazylen = maxlen;
647 } else {
648 emit_pair(lazypos, lazylen);
649 lazyraw = lazypos = lazylen = 0;
651 } else if (maxlen >= 3) {
652 lazyraw = 1;
653 lazypos = maxlenpos;
654 lazylen = maxlen;
655 } else {
656 emit_raw(1);
658 bpos += lazyraw;
660 while (++savebpos < bpos) {
661 hashv = hash(blk+savebpos);
662 h = hashp[hashv];
663 ph = -1;
664 while (h != -1) {
665 ph = h;
666 h = linkp[h];
668 if (ph != -1) linkp[ph] = savebpos; else hashp[hashv] = savebpos;
669 linkp[savebpos] = -1;
674 if (lazyraw) {
675 bpos -= lazyraw;
676 emit_raw(lazyraw);
681 void *rnc_pack (const void *original, int datalen, int *packlen) {
682 const char *data = original;
683 int origlen = datalen;
685 packed = malloc(PACKED_DELTA);
686 if (packed == NULL) {
687 perror("malloc");
688 exit(1);
691 fprintf(stderr, "%3d%%", (int)(((long long)100)*(origlen-datalen)/origlen)); fflush(stderr);
693 packedlen = PACKED_DELTA;
694 packedpos = 20;
695 bwrite(packed+4, datalen);
697 bitpos = 18;
698 bitcount = 0;
699 bitbuf = 0;
700 write_bits (0, 2);
702 while (datalen > 0) {
703 blklen = datalen > BLOCKMAX ? BLOCKMAX : datalen;
704 blkstart = WINMAX-BLOCKMAX;
705 if (blkstart > origlen-datalen) blkstart = origlen-datalen;
706 memcpy (blk, data-blkstart, blkstart+blklen);
707 for (int i = 0; i < HASHMAX; ++i) hashp[i] = -1;
708 ntuple = 0;
709 tuples[ntuple].rawlen = 0;
710 blklen += blkstart;
711 do_block();
712 data += bpos-blkstart;
713 datalen -= bpos-blkstart;
714 write_block();
715 fprintf(stderr, "\x8\x8\x8\x8%3d%%", (int)(((long long)100)*(origlen-datalen)/origlen)); fflush(stderr);
718 if (bitcount > 0) {
719 write_bits(0, 17-bitcount); /* force flush */
720 packedpos -= 2; /* write_bits will have moved it on */
723 *packlen = packedpos;
725 bwrite(packed, RNC_SIGNATURE);
726 bwrite(packed+12, rnc_crc(packed+18, packedpos-18));
727 bwrite(packed+10, rnc_crc(original, origlen));
728 bwrite(packed+8, packedpos-18);
729 packed[16] = packed[17] = 0;
731 fprintf(stderr, "\x8\x8\x8\x8%3d%%", (int)(((long long)100)*(origlen-datalen)/origlen)); fflush(stderr);
733 return packed;