1 /* lzo1a.c -- implementation of the LZO1A algorithm
3 This file is part of the LZO real-time data compression library.
5 Copyright (C) 2011 Markus Franz Xaver Johannes Oberhumer
6 Copyright (C) 2010 Markus Franz Xaver Johannes Oberhumer
7 Copyright (C) 2009 Markus Franz Xaver Johannes Oberhumer
8 Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer
9 Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer
10 Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer
11 Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer
12 Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer
13 Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer
14 Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer
15 Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer
16 Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer
17 Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer
18 Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer
19 Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer
20 Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer
23 The LZO library is free software; you can redistribute it and/or
24 modify it under the terms of the GNU General Public License as
25 published by the Free Software Foundation; either version 2 of
26 the License, or (at your option) any later version.
28 The LZO library is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU General Public License for more details.
33 You should have received a copy of the GNU General Public License
34 along with the LZO library; see the file COPYING.
35 If not, write to the Free Software Foundation, Inc.,
36 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
38 Markus F.X.J. Oberhumer
39 <markus@oberhumer.com>
40 http://www.oberhumer.com/opensource/lzo/
45 #include "lzo/lzo1a.h"
48 /***********************************************************************
49 // The next two defines can be changed to customize LZO1A.
50 // The default version is LZO1A-5/1.
51 ************************************************************************/
53 /* run bits (3 - 5) - the compressor and the decompressor
54 * must use the same value. */
59 /* compression level (1 - 9) - this only affects the compressor.
60 * 1 is fastest, 9 is best compression ratio
63 # define CLEVEL 1 /* fastest by default */
67 /* Collect statistics */
68 #if 0 && !defined(LZO_COLLECT_STATS)
69 # define LZO_COLLECT_STATS 1
73 /***********************************************************************
74 // You should not have to change anything below this line.
75 ************************************************************************/
77 /* check configuration */
78 #if (RBITS < 3 || RBITS > 5)
79 # error "invalid RBITS"
81 #if (CLEVEL < 1 || CLEVEL > 9)
82 # error "invalid CLEVEL"
86 /***********************************************************************
87 // internal configuration
88 // all of these affect compression only
89 ************************************************************************/
91 /* choose the hashing strategy */
93 #define LZO_HASH LZO_HASH_LZO_INCREMENTAL_A
95 #define D_INDEX1(d,p) d = DM(DMUL(0x21,DX2(p,5,5)) >> 5)
96 #define D_INDEX2(d,p) d = d ^ D_MASK
102 /* check other constants */
103 #if (LBITS < 5 || LBITS > 8)
104 # error "invalid LBITS"
108 #if (LZO_COLLECT_STATS)
109 static lzo1a_stats_t lzo_statistics
;
110 lzo1a_stats_t
*lzo1a_stats
= &lzo_statistics
;
111 # define lzo_stats lzo1a_stats
115 /***********************************************************************
116 // get algorithm info, return memory required for compression
117 ************************************************************************/
119 LZO_EXTERN(lzo_uint
) lzo1a_info ( int *rbits
, int *clevel
);
122 lzo1a_info ( int *rbits
, int *clevel
)
128 return D_SIZE
* lzo_sizeof(lzo_bytep
);
132 /***********************************************************************
133 // LZO1A decompress a block of data.
135 // Could be easily translated into assembly code.
136 ************************************************************************/
139 lzo1a_decompress ( const lzo_bytep in
, lzo_uint in_len
,
140 lzo_bytep out
, lzo_uintp out_len
,
143 register lzo_bytep op
;
144 register const lzo_bytep ip
;
146 register const lzo_bytep m_pos
;
147 const lzo_bytep
const ip_end
= in
+ in_len
;
155 t
= *ip
++; /* get marker */
156 LZO_STATS(lzo_stats
->marker
[t
]++);
158 if (t
== 0) /* a R0 literal run */
161 if (t
>= R0FAST
- R0MIN
) /* a long R0 run */
169 t
= 256u << ((unsigned) t
);
171 /* help the optimizer */
173 do tt
<<= 1; while (--t
> 0);
183 else if (t
< R0MIN
) /* a short literal run */
188 /* after a literal a match must follow */
191 t
= *ip
++; /* get R1 marker */
195 /* R1 match - a context sensitive 3 byte match + 1 byte literal */
196 assert((t
& OMASK
) == t
);
197 m_pos
= op
- MIN_OFFSET
;
198 m_pos
-= t
| (((lzo_uint
) *ip
++) << OBITS
);
199 assert(m_pos
>= out
); assert(m_pos
< op
);
209 /* get match offset */
210 m_pos
= op
- MIN_OFFSET
;
211 m_pos
-= (t
& OMASK
) | (((lzo_uint
) *ip
++) << OBITS
);
212 assert(m_pos
>= out
); assert(m_pos
< op
);
215 if (t
< ((MSIZE
- 1) << OBITS
)) /* a short match */
220 MEMCPY_DS(op
,m_pos
,t
);
222 else /* a long match */
225 t
= (MIN_MATCH_LONG
- THRESHOLD
) + ((lzo_uint
)(*ip
++) & LMASK
);
227 t
= (MIN_MATCH_LONG
- THRESHOLD
) + (lzo_uint
)(*ip
++);
231 MEMCPY_DS(op
,m_pos
,t
);
233 /* a very short literal following a long match */
243 *out_len
= pd(op
, out
);
245 /* the next line is the only check in the decompressor */
246 return (ip
== ip_end
? LZO_E_OK
:
247 (ip
< ip_end
? LZO_E_INPUT_NOT_CONSUMED
: LZO_E_INPUT_OVERRUN
));
252 /***********************************************************************
253 // LZO1A compress a block of data.
255 // I apologize for the spaghetti code, but it really helps the optimizer.
256 ************************************************************************/
258 #include "lzo1a_cr.ch"
261 do_compress ( const lzo_bytep in
, lzo_uint in_len
,
262 lzo_bytep out
, lzo_uintp out_len
,
265 register const lzo_bytep ip
;
266 #if defined(__LZO_HASH_INCREMENTAL)
269 const lzo_bytep m_pos
;
271 const lzo_bytep
const ip_end
= in
+in_len
- DVAL_LEN
- MIN_MATCH_LONG
;
272 const lzo_bytep
const in_end
= in
+in_len
- DVAL_LEN
;
274 lzo_dict_p
const dict
= (lzo_dict_p
) wrkmem
;
275 const lzo_bytep r1
= ip_end
; /* pointer for R1 match (none yet) */
277 const lzo_bytep im
= ip_end
; /* pointer to last match start */
281 const lzo_bytep m_pos_sav
;
286 ii
= ip
; /* point to start of current literal run */
288 /* init dictionary */
289 #if (LZO_DETERMINISTIC)
290 BZERO8_PTR(wrkmem
,sizeof(lzo_dict_t
),D_SIZE
);
293 DVAL_FIRST(dv
,ip
); UPDATE_D(dict
,0,dv
,ip
,in
); ip
++;
297 LZO_DEFINE_UNINITIALIZED_VAR(lzo_uint
, m_off
, 0);
301 GINDEX(m_pos
,m_off
,dict
,dindex
,in
);
302 if (LZO_CHECK_MPOS_NON_DET(m_pos
,m_off
,in
,ip
,MAX_OFFSET
))
304 if (m_pos
[0] == ip
[0] && m_pos
[1] == ip
[1] && m_pos
[2] == ip
[2])
307 GINDEX(m_pos
,m_off
,dict
,dindex
,in
);
308 if (LZO_CHECK_MPOS_NON_DET(m_pos
,m_off
,in
,ip
,MAX_OFFSET
))
310 if (m_pos
[0] == ip
[0] && m_pos
[1] == ip
[1] && m_pos
[2] == ip
[2])
315 UPDATE_I(dict
,0,dindex
,ip
,in
);
321 UPDATE_I(dict
,0,dindex
,ip
,in
);
322 #if !defined(NDEBUG) && (LZO_DICT_USE_PTR)
323 assert(m_pos
== NULL
|| m_pos
>= in
);
328 /* we have found a match (of at least length 3) */
330 #if !defined(NDEBUG) && !(LZO_DICT_USE_PTR)
331 assert((m_pos_sav
= ip
- m_off
) == (m_pos
- 3));
337 /* 1) store the current literal run */
340 lzo_uint t
= pd(ip
,ii
);
342 if (ip
- r1
== MIN_MATCH
+ 1)
344 /* Code a context sensitive R1 match.
345 * This is tricky and somewhat difficult to explain:
346 * multiplex a literal run of length 1 into the previous
347 * short match of length MIN_MATCH.
349 * - after a short run a match MUST follow
350 * - therefore the value m = 000 in the mmmooooo marker is free
351 * - use 000ooooo to indicate a MIN_MATCH match (this
352 * is already coded) plus a 1 byte literal
355 /* modify marker byte */
356 assert((op
[-2] >> OBITS
) == (MIN_MATCH
- THRESHOLD
));
358 assert((op
[-2] >> OBITS
) == 0);
361 LZO_STATS(lzo_stats
->r1_matches
++);
362 r1
= ip
; /* set new R1 pointer */
366 /* inline the copying of a short run */
368 if (t
< (1 << (8-LBITS
)) && ii
- im
>= MIN_MATCH_LONG
)
370 /* Code a very short literal run into the
371 * previous long match length byte.
373 LZO_STATS(lzo_stats
->lit_runs_after_long_match
++);
374 LZO_STATS(lzo_stats
->lit_run_after_long_match
[t
]++);
375 assert(ii
- im
<= MAX_MATCH_LONG
);
376 assert((op
[-1] >> LBITS
) == 0);
377 op
[-1] |= t
<< LBITS
;
378 MEMCPY_DS(op
, ii
, t
);
383 LZO_STATS(lzo_stats
->lit_runs
++);
384 LZO_STATS(lzo_stats
->lit_run
[t
]++);
386 MEMCPY_DS(op
, ii
, t
);
387 r1
= ip
; /* set new R1 pointer */
392 /* inline the copying of a short R0 run */
393 LZO_STATS(lzo_stats
->r0short_runs
++);
394 *op
++ = 0; *op
++ = LZO_BYTE(t
- R0MIN
);
395 MEMCPY_DS(op
, ii
, t
);
396 r1
= ip
; /* set new R1 pointer */
399 op
= store_run(op
,ii
,t
);
406 /* 2) compute match len */
407 ii
= ip
; /* point to start of current match */
409 /* we already matched MIN_MATCH bytes,
410 * m_pos also already advanced MIN_MATCH bytes */
414 /* try to match another MIN_MATCH_LONG - MIN_MATCH bytes
415 * to see if we get a long match */
417 #define PS *m_pos++ != *ip++
419 #if (MIN_MATCH_LONG - MIN_MATCH == 2) /* MBITS == 2 */
421 #elif (MIN_MATCH_LONG - MIN_MATCH == 6) /* MBITS == 3 */
422 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
423 #elif (MIN_MATCH_LONG - MIN_MATCH == 14) /* MBITS == 4 */
424 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
425 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
426 #elif (MIN_MATCH_LONG - MIN_MATCH == 30) /* MBITS == 5 */
427 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
428 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
429 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
430 PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
432 # error "MBITS not yet implemented"
435 /* we've found a short match */
438 /* 2a) compute match parameters */
439 assert(ip
-m_pos
== (int)m_off
);
440 --ip
; /* ran one too far, point back to non-match */
442 assert(m_len
>= MIN_MATCH_SHORT
);
443 assert(m_len
<= MAX_MATCH_SHORT
);
444 assert(m_off
>= MIN_OFFSET
);
445 assert(m_off
<= MAX_OFFSET
);
446 assert(ii
-m_off
== m_pos_sav
);
447 assert(lzo_memcmp(m_pos_sav
,ii
,m_len
) == 0);
450 /* 2b) code a short match */
451 /* code short match len + low offset bits */
452 *op
++ = LZO_BYTE(((m_len
- THRESHOLD
) << OBITS
) |
454 /* code high offset bits */
455 *op
++ = LZO_BYTE(m_off
>> OBITS
);
458 #if (LZO_COLLECT_STATS)
459 lzo_stats
->short_matches
++;
460 lzo_stats
->short_match
[m_len
]++;
462 lzo_stats
->short_match_offset_osize
[m_len
]++;
464 lzo_stats
->short_match_offset_256
[m_len
]++;
466 lzo_stats
->short_match_offset_1024
[m_len
]++;
470 /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
472 #define SI /* nothing */
473 #define DI ++ii; DVAL_NEXT(dv,ii); UPDATE_D(dict,0,dv,ii,in);
474 #define XI assert(ii < ip); ii = ip; DVAL_FIRST(dv,(ip));
476 #if (CLEVEL == 9) || (CLEVEL >= 7 && MBITS <= 4) || (CLEVEL >= 5 && MBITS <= 3)
477 /* Insert the whole match (ii+1)..(ip-1) into dictionary. */
481 UPDATE_D(dict
,0,dv
,ii
,in
);
497 /* we've found a long match - see how far we can still go */
501 assert(ip
<= in_end
);
502 assert(ii
== ip
- MIN_MATCH_LONG
);
504 if (pd(in_end
,ip
) <= (MAX_MATCH_LONG
- MIN_MATCH_LONG
))
508 end
= ip
+ (MAX_MATCH_LONG
- MIN_MATCH_LONG
);
509 assert(end
< in_end
);
512 while (ip
< end
&& *m_pos
== *ip
)
514 assert(ip
<= in_end
);
516 /* 2a) compute match parameters */
518 assert(m_len
>= MIN_MATCH_LONG
);
519 assert(m_len
<= MAX_MATCH_LONG
);
520 assert(m_off
>= MIN_OFFSET
);
521 assert(m_off
<= MAX_OFFSET
);
522 assert(ii
-m_off
== m_pos_sav
);
523 assert(lzo_memcmp(m_pos_sav
,ii
,m_len
) == 0);
524 assert(pd(ip
,m_pos
) == m_off
);
527 /* 2b) code the long match */
528 /* code long match flag + low offset bits */
529 *op
++ = LZO_BYTE(((MSIZE
- 1) << OBITS
) | (m_off
& OMASK
));
530 /* code high offset bits */
531 *op
++ = LZO_BYTE(m_off
>> OBITS
);
533 *op
++ = LZO_BYTE(m_len
- MIN_MATCH_LONG
);
536 #if (LZO_COLLECT_STATS)
537 lzo_stats
->long_matches
++;
538 lzo_stats
->long_match
[m_len
]++;
542 /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
544 /* Insert the whole match (ii+1)..(ip-1) into dictionary. */
545 /* This is not recommended because it is slow. */
549 UPDATE_D(dict
,0,dv
,ii
,in
);
555 SI DI DI DI DI DI DI DI DI XI
557 SI DI DI DI DI DI DI DI XI
559 SI DI DI DI DI DI DI XI
573 /* ii now points to the start of the next literal run */
577 } while (ip
< ip_end
);
579 assert(ip
<= in_end
);
582 #if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
583 /* return -1 if op == out to indicate that we
584 * couldn't compress and didn't copy anything.
589 return LZO_E_NOT_COMPRESSIBLE
;
593 /* store the final literal run */
594 if (pd(in_end
+DVAL_LEN
,ii
) > 0)
595 op
= store_run(op
,ii
,pd(in_end
+DVAL_LEN
,ii
));
597 *out_len
= pd(op
, out
);
598 return 0; /* compression went ok */
602 /***********************************************************************
603 // LZO1A compress public entry point.
604 ************************************************************************/
607 lzo1a_compress ( const lzo_bytep in
, lzo_uint in_len
,
608 lzo_bytep out
, lzo_uintp out_len
,
614 #if (LZO_COLLECT_STATS)
615 lzo_memset(lzo_stats
,0,sizeof(*lzo_stats
));
616 lzo_stats
->rbits
= RBITS
;
617 lzo_stats
->clevel
= CLEVEL
;
618 lzo_stats
->dbits
= DBITS
;
619 lzo_stats
->lbits
= LBITS
;
620 lzo_stats
->min_match_short
= MIN_MATCH_SHORT
;
621 lzo_stats
->max_match_short
= MAX_MATCH_SHORT
;
622 lzo_stats
->min_match_long
= MIN_MATCH_LONG
;
623 lzo_stats
->max_match_long
= MAX_MATCH_LONG
;
624 lzo_stats
->min_offset
= MIN_OFFSET
;
625 lzo_stats
->max_offset
= MAX_OFFSET
;
626 lzo_stats
->r0min
= R0MIN
;
627 lzo_stats
->r0fast
= R0FAST
;
628 lzo_stats
->r0max
= R0MAX
;
629 lzo_stats
->in_len
= in_len
;
633 /* don't try to compress a block that's too short */
636 else if (in_len
<= MIN_MATCH_LONG
+ DVAL_LEN
+ 1)
638 #if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
639 r
= LZO_E_NOT_COMPRESSIBLE
;
641 *out_len
= pd(store_run(out
,in
,in_len
), out
);
645 r
= do_compress(in
,in_len
,out
,out_len
,wrkmem
);
648 #if (LZO_COLLECT_STATS)
649 lzo_stats
->short_matches
-= lzo_stats
->r1_matches
;
650 lzo_stats
->short_match
[MIN_MATCH
] -= lzo_stats
->r1_matches
;
651 lzo_stats
->out_len
= *out_len
;