1 /***********************************************************************
2 * This file is part of HA, a general purpose file archiver.
3 * Copyright (C) 1995 Harri Hirvola
4 * Modified by Ketmar // Invisible Vector
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 ***********************************************************************/
26 #include "libhapack.h"
29 /******************************************************************************/
31 /******************************************************************************/
33 #define POSCODES (31200) /* also, dictionary buffer len for SWD */
38 #define LLMASK (LLLEN-1)
39 #define LENCODES (SLCODES+LLCODES*LLLEN)
40 #define LTCODES (SLCODES+LLCODES)
44 #define MAXLT (750*LTSTEP)
46 #define MAXCT (1000*CTSTEP)
48 #define MAXPT (250*PTSTEP)
50 #define MAXTT (150*TTSTEP)
52 #define TTOMASK (TTORD-1)
53 #define LCUTOFF (3*LTSTEP)
54 #define CCUTOFF (3*CTSTEP)
57 #define MINLENLIM (4096)
60 /* Minimum possible match lenght for this implementation */
64 #define HASH(p) ((swd->b[p]^((swd->b[p+1]^(swd->b[p+2]<<HSHIFT))<<HSHIFT))&(HSIZE-1))
67 #define MAXFLEN (LENCODES+MINLEN-1) /* max len to be found */
68 #define MAXDLEN (POSCODES+MAXFLEN) /* reserved bytes for dict; POSCODES+2*MAXFLEN-1<32768 !!! */
71 /******************************************************************************/
72 static int dummy_bread (void *buf
, int buf_len
, void *udata
) { return -1; }
73 static int dummy_bwrite (const void *buf
, int buf_len
, void *udata
) { return -1; }
76 static const libha_io_t dummy_iot
= {
78 .bwrite
= dummy_bwrite
,
99 /******************************************************************************/
100 static inline int get_byte (io_t
*io
) {
101 if (io
->bufin_pos
>= io
->bufin_max
) {
102 if (io
->bufin_pos
< io
->bufin_size
+1) {
104 io
->bufin_max
= io
->io
->bread(io
->bufin
, io
->bufin_size
, io
->udata
);
105 if (io
->bufin_max
< 0) longjmp(io
->errJP
, LIBHA_ERR_READ
);
106 if (io
->bufin_max
== 0) { io
->bufin_pos
= io
->bufin_size
+42; return -1; } /* EOF */
111 return io
->bufin
[io
->bufin_pos
++];
115 static inline void put_byte (io_t
*io
, int c
) {
116 if (io
->bufout_pos
>= io
->bufout_size
) {
117 int res
= io
->io
->bwrite(io
->bufout
, io
->bufout_pos
, io
->udata
);
118 if (res
!= io
->bufout_pos
) longjmp(io
->errJP
, LIBHA_ERR_WRITE
);
121 io
->bufout
[io
->bufout_pos
++] = c
&0xff;
125 static inline void flush (io_t
*io
) {
126 if (io
->bufout_pos
> 0) {
127 int res
= io
->io
->bwrite(io
->bufout
, io
->bufout_pos
, io
->udata
);
128 if (res
!= io
->bufout_pos
) longjmp(io
->errJP
, LIBHA_ERR_WRITE
);
134 /******************************************************************************/
136 uint16_t swd_bpos
, swd_mlf
;
138 uint16_t cblen
, binb
;
139 uint16_t bbf
, bbl
, inptr
;
140 uint16_t ccnt
[HSIZE
], ll
[MAXDLEN
], cr
[HSIZE
], best
[MAXDLEN
];
141 uint8_t b
[MAXDLEN
+MAXFLEN
]; // was:-1
142 uint16_t blen
, iblen
;
147 static void swd_init (swd_t
*swd
) {
148 memset(swd
->ccnt
, 0, sizeof(swd
->ccnt
));
149 memset(swd
->ll
, 0, sizeof(swd
->ll
));
150 memset(swd
->cr
, 0, sizeof(swd
->cr
));
151 memset(swd
->best
, 0, sizeof(swd
->best
));
152 memset(swd
->b
, 0, sizeof(swd
->b
));
153 swd
->binb
= swd
->bbf
= swd
->bbl
= swd
->inptr
= 0;
154 swd
->swd_mlf
= MINLEN
-1;
158 /* return -1 on EOF or 0 */
159 static int swd_first_bytes (swd_t
*swd
) {
160 while (swd
->bbl
< MAXFLEN
) {
161 int b
= get_byte(swd
->io
);
162 if (b
< 0) return -1;
163 swd
->b
[swd
->inptr
++] = b
;
170 static void swd_accept (swd_t
*swd
) {
171 int16_t j
= swd
->swd_mlf
-2;
172 /* relies on non changed swd->swd_mlf !!! */
175 if (swd
->binb
== POSCODES
) --swd
->ccnt
[HASH(swd
->inptr
)]; else ++swd
->binb
;
177 swd
->ll
[swd
->bbf
] = swd
->cr
[i
];
178 swd
->cr
[i
] = swd
->bbf
;
179 swd
->best
[swd
->bbf
] = 30000;
181 if (++swd
->bbf
== MAXDLEN
) swd
->bbf
= 0;
182 if ((i
= get_byte(swd
->io
)) < 0) {
184 if (++swd
->inptr
== MAXDLEN
) swd
->inptr
= 0;
187 if (swd
->inptr
< MAXFLEN
-1) {
188 swd
->b
[swd
->inptr
+MAXDLEN
] = swd
->b
[swd
->inptr
] = i
;
191 swd
->b
[swd
->inptr
] = i
;
192 if (++swd
->inptr
== MAXDLEN
) swd
->inptr
= 0;
195 swd
->swd_mlf
= MINLEN
-1;
199 static void swd_findbest (swd_t
*swd
) {
200 uint16_t ref
, ptr
, start_len
;
202 uint16_t i
= HASH(swd
->bbf
);
203 uint16_t cnt
= swd
->ccnt
[i
];
205 if (cnt
> MAXCNT
) cnt
= MAXCNT
;
206 ptr
= swd
->ll
[swd
->bbf
] = swd
->cr
[i
];
207 swd
->cr
[i
] = swd
->bbf
;
208 swd
->swd_char
= swd
->b
[swd
->bbf
];
209 if ((start_len
= swd
->swd_mlf
) >= swd
->bbl
) {
210 if (swd
->bbl
== 0) swd
->swd_char
= -1;
211 swd
->best
[swd
->bbf
] = 30000;
213 for (ref
= swd
->b
[swd
->bbf
+swd
->swd_mlf
-1]; cnt
--; ptr
= swd
->ll
[ptr
]) {
214 if (swd
->b
[ptr
+swd
->swd_mlf
-1] == ref
&& swd
->b
[ptr
] == swd
->b
[swd
->bbf
] && swd
->b
[ptr
+1] == swd
->b
[swd
->bbf
+1]) {
215 unsigned char *p1
= swd
->b
+ptr
+3, *p2
= swd
->b
+swd
->bbf
+3;
216 for (i
= 3; i
< swd
->bbl
; ++i
) if (*p1
++ != *p2
++) break;
217 if (i
<= swd
->swd_mlf
) continue;
219 if ((swd
->swd_mlf
= i
) == swd
->bbl
|| swd
->best
[ptr
] < i
) break;
220 ref
= swd
->b
[swd
->bbf
+swd
->swd_mlf
-1];
223 swd
->best
[swd
->bbf
] = swd
->swd_mlf
;
224 if (swd
->swd_mlf
> start_len
) {
225 if (swd
->swd_bpos
< swd
->bbf
) {
226 swd
->swd_bpos
= swd
->bbf
-swd
->swd_bpos
-1;
228 swd
->swd_bpos
= MAXDLEN
-1-swd
->swd_bpos
+swd
->bbf
;
232 if (swd
->binb
== POSCODES
) --swd
->ccnt
[HASH(swd
->inptr
)]; else ++swd
->binb
;
233 if (++swd
->bbf
== MAXDLEN
) swd
->bbf
= 0;
234 if ((ch
= get_byte(swd
->io
)) < 0) {
236 if (++swd
->inptr
== MAXDLEN
) swd
->inptr
= 0;
239 if (swd
->inptr
< MAXFLEN
-1) {
240 swd
->b
[swd
->inptr
+MAXDLEN
] = swd
->b
[swd
->inptr
] = ch
;
243 swd
->b
[swd
->inptr
] = ch
;
244 if (++swd
->inptr
== MAXDLEN
) swd
->inptr
= 0;
249 /******************************************************************************/
258 /***********************************************************************
260 ***********************************************************************/
261 #define putbit(b) do { \
263 if (b) ari->ppat |= 1; \
264 if (ari->ppat&0x100) { \
265 put_byte(ari->io, ari->ppat&0xff); \
271 /***********************************************************************
273 ***********************************************************************/
274 static void ac_out (ari_t
*ari
, uint16_t low
, uint16_t high
, uint16_t tot
) {
276 if (tot
== 0) longjmp(ari
->io
->errJP
, LIBHA_ERR_BAD_DATA
);
277 r
= (uint32_t)(ari
->h
-ari
->l
)+1;
278 ari
->h
= (uint16_t)(r
*high
/tot
-1)+ari
->l
;
279 ari
->l
+= (uint16_t)(r
*low
/tot
);
280 if (!((ari
->h
^ari
->l
)&0x8000)) {
281 putbit(ari
->l
&0x8000);
284 putbit(~ari
->l
&0x8000);
289 while (!((ari
->h
^ari
->l
)&0x8000)) {
290 putbit(ari
->l
&0x8000);
296 while ((ari
->l
&0x4000) && !(ari
->h
&0x4000)) {
306 static void ac_init_encode (ari_t
*ari
) {
313 static void ac_end_encode (ari_t
*ari
) {
315 putbit(ari
->l
&0x4000);
317 putbit(~ari
->l
&0x4000);
319 if (ari
->ppat
== 1) {
323 while (!(ari
->ppat
&0x100)) ari
->ppat
<<= 1;
324 put_byte(ari
->io
, ari
->ppat
&0xff);
329 /******************************************************************************/
330 #define BUFIN_SIZE (64*1024)
331 #define BUFOUT_SIZE (64*1024)
334 uint16_t ltab
[2*LTCODES
];
335 uint16_t eltab
[2*LTCODES
];
336 uint16_t ptab
[2*PTCODES
];
337 uint16_t ctab
[2*CTCODES
];
338 uint16_t ectab
[2*CTCODES
];
339 uint16_t ttab
[TTORD
][2];
340 uint16_t accnt
, pmax
, npt
;
348 uint8_t bufin
[BUFIN_SIZE
];
349 uint8_t bufout
[BUFOUT_SIZE
];
354 static void setup_buffers (libha_t asc
) {
355 asc
->io
.bufin
= asc
->bufin
;
356 asc
->io
.bufin_size
= sizeof(asc
->bufin
);
357 asc
->io
.bufin_pos
= 0;
358 asc
->io
.bufin_max
= 0;
359 asc
->io
.bufout
= asc
->bufout
;
360 asc
->io
.bufout_size
= sizeof(asc
->bufout
);
361 asc
->io
.bufout_pos
= 0;
365 libha_t
libha_alloc (const libha_io_t
*iot
, void *udata
) {
366 libha_t res
= calloc(1, sizeof(struct libha_s
));
368 res
->iot
= (iot
? *iot
: dummy_iot
);
375 void libha_free (libha_t asc
) {
376 if (asc
!= NULL
) free(asc
);
380 const libha_io_t
*libha_get_iot (libha_t asc
) {
381 return (asc
!= NULL
? &asc
->iot
: NULL
);
385 int libha_set_iot (libha_t asc
, const libha_io_t
*iot
) {
387 asc
->iot
= (iot
? *iot
: dummy_iot
);
394 void *libha_get_udata (libha_t asc
) {
395 return (asc
!= NULL
? asc
->udata
: NULL
);
399 int libha_set_udata (libha_t asc
, void *udata
) {
408 static inline void tabinit (uint16_t t
[], uint16_t tl
, uint16_t ival
) {
410 for (i
= tl
; i
< 2*tl
; ++i
) t
[i
] = ival
;
411 for (i
= tl
-1, j
= (tl
<<1)-2; i
; --i
, j
-= 2) t
[i
] = t
[j
]+t
[j
+1];
415 static inline void tscale (uint16_t t
[], uint16_t tl
) {
417 for (i
= (tl
<<1)-1; i
>= tl
; --i
) if (t
[i
] > 1) t
[i
] >>= 1;
418 for (i
= tl
-1, j
= (tl
<<1)-2; i
; --i
, j
-= 2) t
[i
] = t
[j
]+t
[j
+1];
422 static inline void tupd (uint16_t t
[], uint16_t tl
, uint16_t maxt
, uint16_t step
, uint16_t p
) {
424 for (i
= p
+tl
; i
; i
>>= 1) t
[i
] += step
;
425 if (t
[1] >= maxt
) tscale(t
, tl
);
429 static inline void tzero (uint16_t t
[], uint16_t tl
, uint16_t p
) {
431 for (i
= p
+tl
, step
= t
[i
]; i
; i
>>= 1) t
[i
] -= step
;
435 static void model_init (libha_t asc
) {
441 asc
->npt
= asc
->pmax
= 1;
442 for (i
= 0; i
< TTORD
; ++i
) asc
->ttab
[i
][0] = asc
->ttab
[i
][1] = TTSTEP
;
443 tabinit(asc
->ltab
, LTCODES
, 0);
444 tabinit(asc
->eltab
, LTCODES
, 1);
445 tabinit(asc
->ctab
, CTCODES
, 0);
446 tabinit(asc
->ectab
, CTCODES
, 1);
447 tabinit(asc
->ptab
, PTCODES
, 0);
448 tupd(asc
->ptab
, PTCODES
, MAXPT
, PTSTEP
, 0);
452 static void pack_init (libha_t asc
) {
454 ac_init_encode(&asc
->ari
);
458 static inline void ttscale (libha_t asc
, uint16_t con
) {
459 asc
->ttab
[con
][0] >>= 1;
460 if (asc
->ttab
[con
][0] == 0) asc
->ttab
[con
][0] = 1;
461 asc
->ttab
[con
][1] >>= 1;
462 if (asc
->ttab
[con
][1] == 0) asc
->ttab
[con
][1] = 1;
466 static void codepair (libha_t asc
, int16_t l
, int16_t p
) {
467 uint16_t i
, j
, lt
, k
, cf
, tot
;
468 i
= asc
->ttab
[asc
->ttcon
][0]+asc
->ttab
[asc
->ttcon
][1];
469 ac_out(&asc
->ari
, asc
->ttab
[asc
->ttcon
][0], i
, i
+1); // writes
470 asc
->ttab
[asc
->ttcon
][1] += TTSTEP
;
471 if (i
>= MAXTT
) ttscale(asc
, asc
->ttcon
);
472 asc
->ttcon
= ((asc
->ttcon
<<1)|1)&TTOMASK
;
473 while (asc
->accnt
> asc
->pmax
) {
474 tupd(asc
->ptab
, PTCODES
, MAXPT
, PTSTEP
, asc
->npt
++);
477 for (i
= p
, j
= 0; i
; ++j
, i
>>= 1) ;
478 cf
= asc
->ptab
[PTCODES
+j
];
480 for (lt
= 0, i
= PTCODES
+j
; i
; i
>>= 1) {
481 if (i
&1) lt
+= asc
->ptab
[i
-1];
482 asc
->ptab
[i
] += PTSTEP
;
484 if (asc
->ptab
[1] >= MAXPT
) tscale(asc
->ptab
, PTCODES
);
485 ac_out(&asc
->ari
, lt
, lt
+cf
, tot
); // writes
487 for (i
= 0x8000U
; !(p
&i
); i
>>= 1) ;
489 if (i
!= (asc
->pmax
>>1)) {
490 ac_out(&asc
->ari
, j
, j
+1, i
); // writes
492 ac_out(&asc
->ari
, j
, j
+1, asc
->accnt
-(asc
->pmax
>>1)); // writes
496 if (i
== LENCODES
-1) {
497 i
= SLCODES
-1, j
= 0xffff;
498 } else if (i
< SLCODES
-1) {
501 j
= (i
-SLCODES
+1)&LLMASK
;
502 i
= ((i
-SLCODES
+1)>>LLBITS
)+SLCODES
;
504 if ((cf
= asc
->ltab
[LTCODES
+i
]) == 0) {
505 ac_out(&asc
->ari
, asc
->ltab
[1], asc
->ltab
[1]+asc
->les
, asc
->ltab
[1]+asc
->les
); // writes
506 for (lt
= 0, k
= LTCODES
+i
; k
; k
>>= 1) {
507 if (k
&1) lt
+= asc
->eltab
[k
-1];
508 asc
->ltab
[k
] += LTSTEP
;
510 if (asc
->ltab
[1] >= MAXLT
) tscale(asc
->ltab
, LTCODES
);
511 ac_out(&asc
->ari
, lt
, lt
+asc
->eltab
[LTCODES
+i
], asc
->eltab
[1]); // writes
512 tzero(asc
->eltab
, LTCODES
, i
);
513 if (asc
->eltab
[1] != 0) asc
->les
+= LTSTEP
; else asc
->les
= 0;
514 for (k
= i
<= LPLEN
? 0 : i
-LPLEN
; k
< (i
+LPLEN
>= LTCODES
-1 ? LTCODES
-1 : i
+LPLEN
); ++k
) {
515 if (asc
->eltab
[LTCODES
+k
]) tupd(asc
->eltab
, LTCODES
, MAXLT
, 1, k
);
518 tot
= asc
->ltab
[1]+asc
->les
;
519 for (lt
= 0, k
= LTCODES
+i
; k
; k
>>= 1) {
520 if (k
&1) lt
+= asc
->ltab
[k
-1];
521 asc
->ltab
[k
] += LTSTEP
;
523 if (asc
->ltab
[1] >= MAXLT
) tscale(asc
->ltab
, LTCODES
);
524 ac_out(&asc
->ari
, lt
, lt
+cf
, tot
); // writes
526 if (asc
->ltab
[LTCODES
+i
] == LCUTOFF
) asc
->les
-= (LTSTEP
< asc
->les
? LTSTEP
: asc
->les
-1);
527 if (j
!= 0xffff) ac_out(&asc
->ari
, j
, j
+1, LLLEN
); // writes
528 if (asc
->accnt
< POSCODES
) {
530 if (asc
->accnt
> POSCODES
) asc
->accnt
= POSCODES
;
535 static void codechar (libha_t asc
, int16_t c
) {
536 uint16_t i
, lt
, tot
, cf
;
537 i
= asc
->ttab
[asc
->ttcon
][0]+asc
->ttab
[asc
->ttcon
][1];
538 ac_out(&asc
->ari
, 0, asc
->ttab
[asc
->ttcon
][0], i
+1); // writes
539 asc
->ttab
[asc
->ttcon
][0] += TTSTEP
;
540 if (i
>= MAXTT
) ttscale(asc
, asc
->ttcon
);
541 asc
->ttcon
= (asc
->ttcon
<<1)&TTOMASK
;
542 if ((cf
= asc
->ctab
[CTCODES
+c
]) == 0) {
543 ac_out(&asc
->ari
, asc
->ctab
[1], asc
->ctab
[1]+asc
->ces
, asc
->ctab
[1]+asc
->ces
); // writes
544 for (lt
= 0, i
= CTCODES
+c
; i
; i
>>= 1) {
545 if (i
&1) lt
+= asc
->ectab
[i
-1];
546 asc
->ctab
[i
] += CTSTEP
;
548 if (asc
->ctab
[1] >= MAXCT
) tscale(asc
->ctab
, CTCODES
);
549 ac_out(&asc
->ari
, lt
, lt
+asc
->ectab
[CTCODES
+c
], asc
->ectab
[1]); // writes
550 tzero(asc
->ectab
, CTCODES
, c
);
551 if (asc
->ectab
[1] != 0) asc
->ces
+= CTSTEP
; else asc
->ces
= 0;
552 for (i
= c
<= CPLEN
? 0 : c
-CPLEN
; i
< (c
+CPLEN
>= CTCODES
-1 ? CTCODES
-1 : c
+CPLEN
); ++i
) {
553 if (asc
->ectab
[CTCODES
+i
]) tupd(asc
->ectab
, CTCODES
, MAXCT
, 1, i
);
556 tot
= asc
->ctab
[1]+asc
->ces
;
557 for (lt
= 0, i
= CTCODES
+c
; i
; i
>>= 1) {
558 if (i
&1) lt
+= asc
->ctab
[i
-1];
559 asc
->ctab
[i
] += CTSTEP
;
561 if (asc
->ctab
[1] >= MAXCT
) tscale(asc
->ctab
, CTCODES
);
562 ac_out(&asc
->ari
, lt
, lt
+cf
, tot
); // writes
564 if (asc
->ctab
[CTCODES
+c
] == CCUTOFF
) asc
->ces
-= (CTSTEP
< asc
->ces
? CTSTEP
: asc
->ces
-1);
565 if (asc
->accnt
< POSCODES
) ++asc
->accnt
;
569 int libha_pack (libha_t asc
) {
572 uint16_t omlf
, obpos
;
573 asc
->io
.io
= &asc
->iot
;
574 asc
->io
.udata
= asc
->udata
;
575 asc
->swd
.io
= asc
->ari
.io
= &asc
->io
;
577 res
= setjmp(asc
->io
.errJP
);
578 if (res
!= 0) return res
;
580 swd_first_bytes(&asc
->swd
); // reads MAXFLEN bytes
582 //swd_findbest(): reads
583 for (swd_findbest(&asc
->swd
); asc
->swd
.swd_char
>= 0; ) {
584 if (asc
->swd
.swd_mlf
> MINLEN
|| (asc
->swd
.swd_mlf
== MINLEN
&& asc
->swd
.swd_bpos
< MINLENLIM
)) {
585 omlf
= asc
->swd
.swd_mlf
;
586 obpos
= asc
->swd
.swd_bpos
;
587 oc
= asc
->swd
.swd_char
;
588 swd_findbest(&asc
->swd
); // reads
589 if (asc
->swd
.swd_mlf
> omlf
) {
592 swd_accept(&asc
->swd
); // reads
593 codepair(asc
, omlf
, obpos
);
594 swd_findbest(&asc
->swd
); // reads
597 asc
->swd
.swd_mlf
= MINLEN
-1;
598 codechar(asc
, asc
->swd
.swd_char
);
599 swd_findbest(&asc
->swd
); // reads
602 ac_out(&asc
->ari
, asc
->ttab
[asc
->ttcon
][0]+asc
->ttab
[asc
->ttcon
][1], asc
->ttab
[asc
->ttcon
][0]+asc
->ttab
[asc
->ttcon
][1]+1, asc
->ttab
[asc
->ttcon
][0]+asc
->ttab
[asc
->ttcon
][1]+1);
603 ac_end_encode(&asc
->ari
);