1 /* n2_99.ch -- implementation of the NRV2[BDE]-99 compression algorithms
3 This file is part of the UCL data compression library.
5 Copyright (C) 1996-2002 Markus Franz Xaver Johannes Oberhumer
8 The UCL library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of
11 the License, or (at your option) any later version.
13 The UCL library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with the UCL library; see the file COPYING.
20 If not, write to the Free Software Foundation, Inc.,
21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 Markus F.X.J. Oberhumer
24 <markus@oberhumer.com>
25 http://www.oberhumer.com/opensource/ucl/
30 #include <ucl/uclconf.h>
41 #if 0 && !defined(UCL_DEBUG)
47 /***********************************************************************
49 ************************************************************************/
52 #define N (128*1024ul) /* size of ring buffer */
54 #define N (1024*1024ul) /* size of ring buffer */
55 #define SWD_USE_MALLOC
56 #define SWD_HSIZE 65536ul
58 #define THRESHOLD 1 /* lower limit for match length */
59 #define F 2048 /* upper limit for match length */
62 # define UCL_COMPRESS_T ucl_nrv2b_t
63 # define ucl_swd_t ucl_nrv2b_swd_t
64 # define ucl_nrv_99_compress ucl_nrv2b_99_compress
65 # define M2_MAX_OFFSET 0xd00
67 # define UCL_COMPRESS_T ucl_nrv2d_t
68 # define ucl_swd_t ucl_nrv2d_swd_t
69 # define ucl_nrv_99_compress ucl_nrv2d_99_compress
70 # define M2_MAX_OFFSET 0x500
72 # define UCL_COMPRESS_T ucl_nrv2e_t
73 # define ucl_swd_t ucl_nrv2e_swd_t
74 # define ucl_nrv_99_compress ucl_nrv2e_99_compress
75 # define M2_MAX_OFFSET 0x500
79 #define ucl_swd_p ucl_swd_t * __UCL_MMODEL
83 ((((((ucl_uint32)b[p]<<3)^b[p+1])<<3)^b[p+2]) & (SWD_HSIZE-1))
85 #if 0 && defined(UCL_UNALIGNED_OK_4) && (UCL_BYTE_ORDER == UCL_LITTLE_ENDIAN)
87 (((* (ucl_uint32p) &b[p]) ^ ((* (ucl_uint32p) &b[p])>>10)) & (SWD_HSIZE-1))
90 #include "ucl_mchw.ch"
93 /***********************************************************************
95 ************************************************************************/
97 static void code_prefix_ss11(UCL_COMPRESS_T *c, ucl_uint32 i)
109 bbPutBit(c, (i & t) ? 1 : 0);
113 bbPutBit(c, (unsigned)i & 1);
118 #if defined(NRV2D) || defined(NRV2E)
119 static void code_prefix_ss12(UCL_COMPRESS_T *c, ucl_uint32 i)
130 bbPutBit(c, (i & t) ? 1 : 0);
133 bbPutBit(c, (i & t) ? 1 : 0);
136 bbPutBit(c, (unsigned)i & 1);
143 code_match(UCL_COMPRESS_T *c, ucl_uint m_len, const ucl_uint m_off)
147 while (m_len > c->conf.max_match)
149 code_match(c, c->conf.max_match - 3, m_off);
150 m_len -= c->conf.max_match - 3;
153 c->match_bytes += m_len;
154 if (m_len > c->result[3])
155 c->result[3] = m_len;
156 if (m_off > c->result[1])
157 c->result[1] = m_off;
162 if (m_off == c->last_m_off)
169 code_prefix_ss11(c, 1 + ((m_off - 1) >> 8));
170 bbPutByte(c, (unsigned)m_off - 1);
172 m_len = m_len - 1 - (m_off > M2_MAX_OFFSET);
177 code_prefix_ss11(c, m_len - 4);
181 bbPutBit(c, m_len > 1);
182 bbPutBit(c, (unsigned)m_len & 1);
185 m_len = m_len - 1 - (m_off > M2_MAX_OFFSET);
187 m_low = (m_len >= 4) ? 0u : (unsigned) m_len;
188 if (m_off == c->last_m_off)
192 bbPutBit(c, m_low > 1);
193 bbPutBit(c, m_low & 1);
197 code_prefix_ss12(c, 1 + ((m_off - 1) >> 7));
198 bbPutByte(c, ((((unsigned)m_off - 1) & 0x7f) << 1) | ((m_low > 1) ? 0 : 1));
199 bbPutBit(c, m_low & 1);
202 code_prefix_ss11(c, m_len - 4);
204 m_len = m_len - 1 - (m_off > M2_MAX_OFFSET);
206 m_low = (m_len <= 2);
207 if (m_off == c->last_m_off)
215 code_prefix_ss12(c, 1 + ((m_off - 1) >> 7));
216 bbPutByte(c, ((((unsigned)m_off - 1) & 0x7f) << 1) | (m_low ^ 1));
219 bbPutBit(c, (unsigned)m_len - 1);
223 bbPutBit(c, (unsigned)m_len - 3);
228 code_prefix_ss11(c, m_len - 5);
234 c->last_m_off = m_off;
240 code_run(UCL_COMPRESS_T *c, const ucl_byte *ii, ucl_uint lit)
245 if (lit > c->result[5])
254 /***********************************************************************
256 ************************************************************************/
259 len_of_coded_match(UCL_COMPRESS_T *c, ucl_uint m_len, ucl_uint m_off)
262 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET))
263 || m_off > c->conf.max_offset)
267 m_len = m_len - 2 - (m_off > M2_MAX_OFFSET);
269 if (m_off == c->last_m_off)
275 m_off = (m_off - 1) >> 8;
281 #elif defined(NRV2D) || defined(NRV2E)
283 m_off = (m_off - 1) >> 7;
294 #if defined(NRV2B) || defined(NRV2D)
318 /***********************************************************************
320 ************************************************************************/
324 void assert_match( const ucl_swd_p swd, ucl_uint m_len, ucl_uint m_off )
326 const UCL_COMPRESS_T *c = swd->c;
330 if (m_off <= (ucl_uint) (c->bp - c->in))
332 assert(c->bp - m_off + m_len < c->ip);
333 assert(ucl_memcmp(c->bp, c->bp - m_off, m_len) == 0);
337 assert(swd->dict != NULL);
338 d_off = m_off - (ucl_uint) (c->bp - c->in);
339 assert(d_off <= swd->dict_len);
342 assert(ucl_memcmp(c->bp, swd->dict_end - d_off, d_off) == 0);
343 assert(c->in + m_len - d_off < c->ip);
344 assert(ucl_memcmp(c->bp + d_off, c->in, m_len - d_off) == 0);
348 assert(ucl_memcmp(c->bp, swd->dict_end - d_off, m_len) == 0);
353 # define assert_match(a,b,c) ((void)0)
357 #if defined(SWD_BEST_OFF)
360 better_match ( const ucl_swd_p swd, ucl_uint *m_len, ucl_uint *m_off )
367 /***********************************************************************
369 ************************************************************************/
372 ucl_nrv_99_compress ( const ucl_bytep in, ucl_uint in_len,
373 ucl_bytep out, ucl_uintp out_len,
374 ucl_progress_callback_p cb,
376 const struct ucl_compress_config_p conf,
381 ucl_uint m_len, m_off;
382 UCL_COMPRESS_T c_buffer;
383 UCL_COMPRESS_T * const c = &c_buffer;
385 #if 1 && defined(SWD_USE_MALLOC)
387 # define swd (&the_swd)
391 ucl_uint result_buffer[16];
397 ucl_uint good_length;
399 ucl_uint nice_length;
402 ucl_uint32 max_offset;
404 const struct swd_config_t *sc;
405 static const struct swd_config_t swd_config[10] = {
406 /* faster compression */
407 { 0, 0, 0, 8, 4, 0, 48*1024L },
408 { 0, 0, 0, 16, 8, 0, 48*1024L },
409 { 0, 0, 0, 32, 16, 0, 48*1024L },
410 { 1, 4, 4, 16, 16, 0, 48*1024L },
411 { 1, 8, 16, 32, 32, 0, 48*1024L },
412 { 1, 8, 16, 128, 128, 0, 48*1024L },
413 { 2, 8, 32, 128, 256, 0, 128*1024L },
414 { 2, 32, 128, F, 2048, 1, 128*1024L },
415 { 2, 32, 128, F, 2048, 1, 256*1024L },
416 { 2, F, F, F, 4096, 1, N }
417 /* max. compression */
420 if (level < 1 || level > 10)
421 return UCL_E_INVALID_ARGUMENT;
422 sc = &swd_config[level - 1];
424 memset(c, 0, sizeof(*c));
426 c->in_end = in + in_len;
428 if (cb && cb->callback)
431 c->result = result ? result : (ucl_uintp) result_buffer;
432 memset(c->result, 0, 16*sizeof(*c->result));
433 c->result[0] = c->result[2] = c->result[4] = UCL_UINT_MAX;
435 memset(&c->conf, 0xff, sizeof(c->conf));
437 memcpy(&c->conf, conf, sizeof(c->conf));
439 r = bbConfig(c, 0, 8);
441 r = bbConfig(c, c->conf.bb_endian, c->conf.bb_size);
443 return UCL_E_INVALID_ARGUMENT;
446 ii = c->ip; /* point to start of literal run */
450 swd = (ucl_swd_p) ucl_alloc(1, ucl_sizeof(*swd));
452 return UCL_E_OUT_OF_MEMORY;
454 swd->f = UCL_MIN(F, c->conf.max_match);
455 swd->n = UCL_MIN(N, sc->max_offset);
456 if (c->conf.max_offset != UCL_UINT_MAX)
457 swd->n = UCL_MIN(N, c->conf.max_offset);
458 if (in_len >= 256 && in_len < swd->n)
460 if (swd->f < 8 || swd->n < 256)
461 return UCL_E_INVALID_ARGUMENT;
462 r = init_match(c,swd,NULL,0,sc->flags);
470 if (sc->max_chain > 0)
471 swd->max_chain = sc->max_chain;
472 if (sc->nice_length > 0)
473 swd->nice_length = sc->nice_length;
474 if (c->conf.max_match < swd->nice_length)
475 swd->nice_length = c->conf.max_match;
478 (*c->cb->callback)(0,0,-1,c->cb->user);
481 r = find_match(c,swd,0,0);
490 c->codesize = c->bb_op - out;
495 assert(c->bp == c->ip - c->look);
499 assert(ii + lit == c->bp);
500 assert(swd->b_char == *(c->bp));
502 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET))
503 || m_off > c->conf.max_offset)
507 swd->max_chain = sc->max_chain;
508 r = find_match(c,swd,1,0);
514 #if defined(SWD_BEST_OFF)
515 if (swd->use_best_off)
516 better_match(swd,&m_len,&m_off);
518 assert_match(swd,m_len,m_off);
520 /* shall we try a lazy match ? */
522 if (sc->try_lazy <= 0 || m_len >= sc->max_lazy || m_off == c->last_m_off)
530 /* yes, try a lazy match */
531 l1 = len_of_coded_match(c,m_len,m_off);
533 max_ahead = UCL_MIN(sc->try_lazy, m_len - 1);
536 while (ahead < max_ahead && c->look > m_len)
538 if (m_len >= sc->good_length)
539 swd->max_chain = sc->max_chain >> 2;
541 swd->max_chain = sc->max_chain;
542 r = find_match(c,swd,1,0);
547 assert(ii + lit + ahead == c->bp);
551 #if defined(SWD_BEST_OFF)
552 if (swd->use_best_off)
553 better_match(swd,&c->m_len,&c->m_off);
555 l2 = len_of_coded_match(c,c->m_len,c->m_off);
559 if (l1 + (int)(ahead + c->m_len - m_len) * 5 > l2 + (int)(ahead) * 9)
565 assert_match(swd,c->m_len,c->m_off);
570 /* code previous run */
573 /* code shortened match */
574 code_match(c,ahead,m_off);
580 assert(ii + lit == c->bp);
582 goto lazy_match_done;
586 assert(ii + lit + ahead == c->bp);
593 code_match(c,m_len,m_off);
594 swd->max_chain = sc->max_chain;
595 r = find_match(c,swd,m_len,1+ahead);
601 /* store final run */
607 code_prefix_ss11(c, UCL_UINT32_C(0x1000000));
609 #elif defined(NRV2D) || defined(NRV2E)
610 code_prefix_ss12(c, UCL_UINT32_C(0x1000000));
617 assert(c->textsize == in_len);
618 c->codesize = c->bb_op - out;
619 *out_len = c->bb_op - out;
621 (*c->cb->callback)(c->textsize,c->codesize,4,c->cb->user);
624 printf("%7ld %7ld -> %7ld %7ld %7ld %ld (max: %d %d %d)\n",
625 (long) c->textsize, (long) in_len, (long) c->codesize,
626 c->match_bytes, c->lit_bytes, c->lazy,
627 c->result[1], c->result[3], c->result[5]);
629 assert(c->lit_bytes + c->match_bytes == in_len);