added COPYING and 'modified by'
[libha.git] / src / libhaunp.c
blob8dd16cee580c2d622d30ed320223ef261d41a691
1 /***********************************************************************
2 * This file is part of HA, a general purpose file archiver.
3 * Copyright (C) 1995 Harri Hirvola
4 * Modified by Ketmar // Invisible Vector
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 ***********************************************************************/
20 #include <setjmp.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
25 #include "libhaunp.h"
28 /******************************************************************************/
29 enum {
30 ERR_OUT_OF_DATA = 1,
31 ERR_READ,
32 ERR_BAD_DATA
36 /******************************************************************************/
37 #define POSCODES (31200)
38 #define SLCODES (16)
39 #define LLCODES (48)
40 #define LLLEN (16)
41 #define LLBITS (4)
42 #define LENCODES (SLCODES+LLCODES*LLLEN)
43 #define LTCODES (SLCODES+LLCODES)
44 #define CTCODES (256)
45 #define PTCODES (16)
46 #define LTSTEP (8)
47 #define MAXLT (750*LTSTEP)
48 #define CTSTEP (1)
49 #define MAXCT (1000*CTSTEP)
50 #define PTSTEP (24)
51 #define MAXPT (250*PTSTEP)
52 #define TTSTEP (40)
53 #define MAXTT (150*TTSTEP)
54 #define TTORD (4)
55 #define TTOMASK (TTORD-1)
56 #define LCUTOFF (3*LTSTEP)
57 #define CCUTOFF (3*CTSTEP)
58 #define CPLEN (8)
59 #define LPLEN (4)
62 /******************************************************************************/
63 #define RD_BUF_SIZE (65536)
64 struct haunp_s {
65 /* hup */
66 uint16_t ltab[2*LTCODES];
67 uint16_t eltab[2*LTCODES];
68 uint16_t ptab[2*PTCODES];
69 uint16_t ctab[2*CTCODES];
70 uint16_t ectab[2*CTCODES];
71 uint16_t ttab[TTORD][2];
72 uint16_t accnt, pmax, npt;
73 uint16_t ces;
74 uint16_t les;
75 uint16_t ttcon;
76 /* swd */
77 uint8_t dict[POSCODES];
78 uint16_t dict_pos;
79 uint16_t dict_pair_len;
80 uint16_t dict_pair_pos;
81 /* ari */
82 uint16_t ari_h, ari_l, ari_v;
83 int16_t ari_s;
84 int16_t ari_gpat, ari_ppat;
85 int ari_init_done;
86 /* reader */
87 haunp_bread_fn_t reader;
88 uint8_t rd_buf[RD_BUF_SIZE];
89 int rd_size;
90 int rd_pos;
91 int rd_max;
92 int no_more; /* high-level flag */
93 /* for unpack-from-mem */
94 const uint8_t *buf;
95 size_t buf_left;
96 /* other */
97 void *udata;
98 /* unpacker */
99 int done;
100 /* error exit */
101 jmp_buf errJP;
105 /******************************************************************************/
106 /* udata: haunp_t */
107 static int haunp_buf_reader (void *buf, int buf_len, void *udata) {
108 haunp_t hup = (haunp_t)udata;
109 if (hup->buf_left > 0) {
110 if (buf_len > hup->buf_left) buf_len = hup->buf_left;
111 memcpy(buf, hup->buf, buf_len);
112 hup->buf += buf_len;
113 hup->buf_left -= buf_len;
114 return buf_len;
116 return 0;
120 /******************************************************************************/
121 static inline void tabinit (uint16_t t[], uint16_t tl, uint16_t ival) {
122 uint16_t i, j;
123 for (i = tl; i < 2*tl; ++i) t[i] = ival;
124 for (i = tl-1, j = (tl<<1)-2; i; --i, j -= 2) t[i] = t[j]+t[j+1];
128 static inline void tscale (uint16_t t[], uint16_t tl) {
129 uint16_t i, j;
130 for (i = (tl<<1)-1; i >= tl; --i) if (t[i] > 1) t[i] >>= 1;
131 for (i = tl-1, j = (tl<<1)-2; i; --i, j -= 2) t[i] = t[j]+t[j+1];
135 static inline void tupd (uint16_t t[], uint16_t tl, uint16_t maxt, uint16_t step, uint16_t p) {
136 int16_t i;
137 for (i = p+tl; i; i >>= 1) t[i] += step;
138 if (t[1] >= maxt) tscale(t, tl);
142 static inline void tzero (uint16_t t[], uint16_t tl, uint16_t p) {
143 int16_t i, step;
144 for (i = p+tl, step = t[i]; i; i >>= 1) t[i] -= step;
148 static inline void ttscale (haunp_t hup, uint16_t con) {
149 hup->ttab[con][0] >>= 1;
150 if (hup->ttab[con][0] == 0) hup->ttab[con][0] = 1;
151 hup->ttab[con][1] >>= 1;
152 if (hup->ttab[con][1] == 0) hup->ttab[con][1] = 1;
156 /******************************************************************************/
157 /* return number of bytes copied (can be less thatn olen) */
158 static inline int swd_do_pair (haunp_t hup, uint8_t *obuf, int olen) {
159 int todo = (olen > hup->dict_pair_len ? hup->dict_pair_len : olen), res = todo;
160 hup->dict_pair_len -= todo;
161 while (todo-- > 0) {
162 hup->dict[hup->dict_pos] = hup->dict[hup->dict_pair_pos];
163 *obuf++ = hup->dict[hup->dict_pair_pos];
164 if (++hup->dict_pos == POSCODES) hup->dict_pos = 0;
165 if (++hup->dict_pair_pos == POSCODES) hup->dict_pair_pos = 0;
167 return res;
171 /******************************************************************************/
172 /* Arithmetic decoding */
173 /******************************************************************************/
174 /* read next byte (buffered) */
175 #define getbyte(bto) do { \
176 if (hup->rd_pos >= hup->rd_max) { \
177 hup->rd_pos = 0; \
178 hup->rd_max = hup->reader(hup->rd_buf, hup->rd_size, hup->udata); \
179 if (hup->rd_max <= 0) { \
180 hup->no_more = 1; \
181 if (hup->rd_max == 0) longjmp(hup->errJP, ERR_OUT_OF_DATA); \
182 hup->rd_max = 0; \
183 longjmp(hup->errJP, ERR_READ); \
186 bto = hup->rd_buf[hup->rd_pos++]; \
187 } while (0)
190 #define getbit(b) do { \
191 hup->ari_gpat <<= 1; \
192 if (!(hup->ari_gpat&0xff)) { \
193 getbyte(hup->ari_gpat); \
194 if (hup->ari_gpat&0x100) { \
195 hup->ari_gpat = 0x100; \
196 } else { \
197 hup->ari_gpat <<= 1; \
198 hup->ari_gpat |= 1; \
201 b |= (hup->ari_gpat&0x100)>>8; \
202 } while (0)
205 static void ac_in (haunp_t hup, uint16_t low, uint16_t high, uint16_t tot) {
206 uint32_t r;
207 if (tot == 0) longjmp(hup->errJP, ERR_BAD_DATA);
208 r = (uint32_t)(hup->ari_h-hup->ari_l)+1;
209 hup->ari_h = (uint16_t)(r*high/tot-1)+hup->ari_l;
210 hup->ari_l += (uint16_t)(r*low/tot);
211 while (!((hup->ari_h^hup->ari_l)&0x8000)) {
212 hup->ari_l <<= 1;
213 hup->ari_h <<= 1;
214 hup->ari_h |= 1;
215 hup->ari_v <<= 1;
216 getbit(hup->ari_v);
218 while ((hup->ari_l&0x4000) && !(hup->ari_h&0x4000)) {
219 hup->ari_l <<= 1;
220 hup->ari_l &= 0x7fff;
221 hup->ari_h <<= 1;
222 hup->ari_h |= 0x8001;
223 hup->ari_v <<= 1;
224 hup->ari_v ^= 0x8000;
225 getbit(hup->ari_v);
230 static inline uint16_t ac_threshold_val (haunp_t hup, uint16_t tot) {
231 uint32_t r = (uint32_t)(hup->ari_h-hup->ari_l)+1;
232 if (r == 0) longjmp(hup->errJP, ERR_BAD_DATA);
233 return (uint16_t)((((uint32_t)(hup->ari_v-hup->ari_l)+1)*tot-1)/r);
237 /******************************************************************************/
238 int haunp_reset_io (haunp_t hup, haunp_bread_fn_t reader, void *udata) {
239 if (hup != NULL) {
240 memset(hup, 0, sizeof(*hup));
241 hup->reader = reader;
242 hup->udata = udata;
243 /* init dictionary */
244 hup->dict_pos = 0;
245 /* init model */
246 hup->ces = CTSTEP;
247 hup->les = LTSTEP;
248 hup->accnt = 0;
249 hup->ttcon = 0;
250 hup->npt = hup->pmax = 1;
251 for (int i = 0; i < TTORD; ++i) hup->ttab[i][0] = hup->ttab[i][1] = TTSTEP;
252 tabinit(hup->ltab, LTCODES, 0);
253 tabinit(hup->eltab, LTCODES, 1);
254 tabinit(hup->ctab, CTCODES, 0);
255 tabinit(hup->ectab, CTCODES, 1);
256 tabinit(hup->ptab, PTCODES, 0);
257 tupd(hup->ptab, PTCODES, MAXPT, PTSTEP, 0);
258 /* init arithmetic decoder */
259 hup->ari_h = 0xffff;
260 hup->ari_l = 0;
261 hup->ari_gpat = 0;
262 hup->ari_init_done = 0; /* defer initialization */
263 /* read buffer */
264 hup->rd_size = RD_BUF_SIZE;
265 return 0;
267 return -1;
271 int haunp_reset_buf (haunp_t hup, const void *buf, int buf_len) {
272 if (hup != NULL) {
273 haunp_reset_io(hup, haunp_buf_reader, NULL);
274 hup->udata = hup;
275 hup->buf = buf;
276 hup->buf_left = (buf_len > 0 ? buf_len : 0);
277 return 0;
279 return -1;
283 haunp_t haunp_open_io (haunp_bread_fn_t reader, void *udata) {
284 haunp_t hup = calloc(1, sizeof(*hup));
285 if (hup != NULL) haunp_reset_io(hup, reader, udata);
286 return hup;
290 haunp_t haunp_open_buf (const void *buf, int buf_len) {
291 haunp_t hup = calloc(1, sizeof(*hup));
292 if (hup != NULL) haunp_reset_buf(hup, buf, buf_len);
293 return hup;
297 int haunp_close (haunp_t hup) {
298 if (hup != NULL) free(hup);
299 return 0;
303 /******************************************************************************/
304 static void asc_unpack (haunp_t hup, uint8_t *obuf, int olen) {
305 //uint16_t l, p, tv, i, lt;
306 hup->done = 0;
307 if (hup->no_more) return;
308 /* complete defered ari initialization */
309 if (!hup->ari_init_done) {
310 uint8_t b;
311 hup->ari_init_done = 1;
312 getbyte(b);
313 hup->ari_v = b<<8;
314 getbyte(b);
315 hup->ari_v |= b;
317 do_pair:
318 /* if we have some data in dictionary, copy it */
319 if (hup->dict_pair_len) {
320 int d = swd_do_pair(hup, obuf, olen);
321 hup->done += d;
322 if ((olen -= d) == 0) return;
323 obuf += d;
325 /* main unpacking loop; olen is definitely positive here */
326 do {
327 uint16_t l, p, lt;
328 uint16_t tv = ac_threshold_val(hup, hup->ttab[hup->ttcon][0]+hup->ttab[hup->ttcon][1]+1);
329 uint16_t i = hup->ttab[hup->ttcon][0]+hup->ttab[hup->ttcon][1];
330 if (hup->ttab[hup->ttcon][0] > tv) {
331 ac_in(hup, 0, hup->ttab[hup->ttcon][0], i+1);
332 hup->ttab[hup->ttcon][0] += TTSTEP;
333 if (i >= MAXTT) ttscale(hup, hup->ttcon);
334 hup->ttcon = (hup->ttcon<<1)&TTOMASK;
335 tv = ac_threshold_val(hup, hup->ctab[1]+hup->ces);
336 if (tv >= hup->ctab[1]) {
337 ac_in(hup, hup->ctab[1], hup->ctab[1]+hup->ces, hup->ctab[1]+hup->ces);
338 tv = ac_threshold_val(hup, hup->ectab[1]);
339 l = 2;
340 lt = 0;
341 for (;;) {
342 if (lt+hup->ectab[l] <= tv) { lt += hup->ectab[l]; ++l; }
343 if (l >= CTCODES) { l -= CTCODES; break; }
344 l <<= 1;
346 ac_in(hup, lt, lt+hup->ectab[CTCODES+l], hup->ectab[1]);
347 tzero(hup->ectab, CTCODES, l);
348 if (hup->ectab[1] != 0) hup->ces += CTSTEP; else hup->ces = 0;
349 for (i = (l < CPLEN ? 0 : l-CPLEN); i < (l+CPLEN >= CTCODES-1 ? CTCODES-1 : l+CPLEN); ++i) {
350 if (hup->ectab[CTCODES+i]) tupd(hup->ectab, CTCODES, MAXCT, 1, i);
352 } else {
353 l = 2;
354 lt = 0;
355 for (;;) {
356 if (lt+hup->ctab[l] <= tv) { lt += hup->ctab[l]; ++l; }
357 if (l >= CTCODES) { l -= CTCODES; break; }
358 l <<= 1;
360 ac_in(hup, lt, lt+hup->ctab[CTCODES+l], hup->ctab[1]+hup->ces);
362 tupd(hup->ctab, CTCODES, MAXCT, CTSTEP, l);
363 if (hup->ctab[CTCODES+l] == CCUTOFF) hup->ces -= (CTSTEP < hup->ces ? CTSTEP : hup->ces-1);
364 /* literal char from dictionary */
365 hup->dict[hup->dict_pos] = l;
366 if (++hup->dict_pos == POSCODES) hup->dict_pos = 0;
367 /* asc decoder */
368 if (hup->accnt < POSCODES) ++hup->accnt;
369 /* output char */
370 *obuf++ = l;
371 --olen;
372 ++hup->done;
373 } else if (i > tv) {
374 ac_in(hup, hup->ttab[hup->ttcon][0], i, i+1);
375 hup->ttab[hup->ttcon][1] += TTSTEP;
376 if (i >= MAXTT) ttscale(hup, hup->ttcon);
377 hup->ttcon = ((hup->ttcon<<1)|1)&TTOMASK;
378 while (hup->accnt > hup->pmax) {
379 tupd(hup->ptab, PTCODES, MAXPT, PTSTEP, hup->npt++);
380 hup->pmax <<= 1;
382 tv = ac_threshold_val(hup, hup->ptab[1]);
383 p = 2;
384 lt = 0;
385 for (;;) {
386 if (lt+hup->ptab[p] <= tv) { lt += hup->ptab[p]; ++p; }
387 if (p >= PTCODES) { p -= PTCODES; break; }
388 p <<= 1;
390 ac_in(hup, lt, lt+hup->ptab[PTCODES+p], hup->ptab[1]);
391 tupd(hup->ptab, PTCODES, MAXPT, PTSTEP, p);
392 if (p > 1) {
393 for (i = 1; p; i <<= 1, --p) ;
394 i >>= 1;
395 l = (i == hup->pmax>>1 ? hup->accnt-(hup->pmax>>1) : i);
396 p = ac_threshold_val(hup, l);
397 ac_in(hup, p, p+1, l);
398 p += i;
400 tv = ac_threshold_val(hup, hup->ltab[1]+hup->les);
401 if (tv >= hup->ltab[1]) {
402 ac_in(hup, hup->ltab[1], hup->ltab[1]+hup->les, hup->ltab[1]+hup->les);
403 tv = ac_threshold_val(hup, hup->eltab[1]);
404 l = 2;
405 lt = 0;
406 for (;;) {
407 if (lt+hup->eltab[l] <= tv) { lt += hup->eltab[l]; ++l; }
408 if (l >= LTCODES) { l -= LTCODES; break; }
409 l <<= 1;
411 ac_in(hup, lt, lt+hup->eltab[LTCODES+l], hup->eltab[1]);
412 tzero(hup->eltab, LTCODES, l);
413 if (hup->eltab[1] != 0) hup->les += LTSTEP; else hup->les = 0;
414 for (i = l < LPLEN ? 0 : l-LPLEN; i < (l+LPLEN >= LTCODES-1 ? LTCODES-1 : l+LPLEN); ++i) {
415 if (hup->eltab[LTCODES+i]) tupd(hup->eltab, LTCODES, MAXLT, 1, i);
417 } else {
418 l = 2;
419 lt = 0;
420 for (;;) {
421 if (lt+hup->ltab[l] <= tv) { lt += hup->ltab[l]; ++l; }
422 if (l >= LTCODES) { l -= LTCODES; break; }
423 l <<= 1;
425 ac_in(hup, lt, lt+hup->ltab[LTCODES+l], hup->ltab[1]+hup->les);
427 tupd(hup->ltab, LTCODES, MAXLT, LTSTEP, l);
428 if (hup->ltab[LTCODES+l] == LCUTOFF) hup->les -= (LTSTEP < hup->les ? LTSTEP : hup->les-1);
429 if (l == SLCODES-1) {
430 l = LENCODES-1;
431 } else if (l >= SLCODES) {
432 i = ac_threshold_val(hup, LLLEN);
433 ac_in(hup, i, i+1, LLLEN);
434 l = ((l-SLCODES)<<LLBITS)+i+SLCODES-1;
436 l += 3;
437 if (hup->accnt < POSCODES) {
438 hup->accnt += l;
439 if (hup->accnt > POSCODES) hup->accnt = POSCODES;
441 /* pair from dictionary */
442 if (hup->dict_pos > p) {
443 hup->dict_pair_pos = hup->dict_pos-1-p;
444 } else {
445 hup->dict_pair_pos = POSCODES-1-p+hup->dict_pos;
447 hup->dict_pair_len = l;
448 goto do_pair; /* recursive tail call */
449 } else {
450 /*EOF*/
451 /*ac_in(hup, i, i+1, i+1); don't need this*/
452 hup->no_more = 1;
453 break;
455 } while (olen > 0);
459 /* return number of bytes read (<len: end of data) or -1 on error */
460 int haunp_read (haunp_t hup, void *buf, int len) {
461 if (buf == NULL && len < 1) return 0;
462 if (hup != NULL) {
463 int jr = setjmp(hup->errJP);
464 if (jr == 0) {
465 hup->done = 0;
466 asc_unpack(hup, buf, len);
468 return hup->done;
470 return -1;