1 /* lzo1a.c -- implementation of the LZO1A algorithm
3 This file is part of the LZO real-time data compression library.
5 Copyright (C) 1996-2015 Markus Franz Xaver Johannes Oberhumer
8 The LZO library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of
11 the License, or (at your option) any later version.
13 The LZO library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with the LZO library; see the file COPYING.
20 If not, write to the Free Software Foundation, Inc.,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 Markus F.X.J. Oberhumer
24 <markus@oberhumer.com>
25 http://www.oberhumer.com/opensource/lzo/
30 #include <lzo/lzo1a.h>
33 /***********************************************************************
34 // The next two defines can be changed to customize LZO1A.
35 // The default version is LZO1A-5/1.
36 ************************************************************************/
38 /* run bits (3 - 5) - the compressor and the decompressor
39 * must use the same value. */
44 /* compression level (1 - 9) - this only affects the compressor.
45 * 1 is fastest, 9 is best compression ratio
48 # define CLEVEL 1 /* fastest by default */
52 /* Collect statistics */
53 #if 0 && !defined(LZO_COLLECT_STATS)
54 # define LZO_COLLECT_STATS 1
58 /***********************************************************************
59 // You should not have to change anything below this line.
60 ************************************************************************/
62 /* check configuration */
63 #if (RBITS < 3 || RBITS > 5)
64 # error "invalid RBITS"
66 #if (CLEVEL < 1 || CLEVEL > 9)
67 # error "invalid CLEVEL"
71 /***********************************************************************
72 // internal configuration
73 // all of these affect compression only
74 ************************************************************************/
76 /* choose the hashing strategy */
78 #define LZO_HASH LZO_HASH_LZO_INCREMENTAL_A
80 #define D_INDEX1(d,p) d = DM(DMUL(0x21,DX2(p,5,5)) >> 5)
81 #define D_INDEX2(d,p) d = d ^ D_MASK
87 /* check other constants */
88 #if (LBITS < 5 || LBITS > 8)
89 # error "invalid LBITS"
93 #if (LZO_COLLECT_STATS)
94 static lzo1a_stats_t lzo_statistics
;
95 lzo1a_stats_t
*lzo1a_stats
= &lzo_statistics
;
96 # define lzo_stats lzo1a_stats
100 /***********************************************************************
101 // get algorithm info, return memory required for compression
102 ************************************************************************/
104 LZO_EXTERN(lzo_uint
) lzo1a_info ( int *rbits
, int *clevel
);
107 lzo1a_info ( int *rbits
, int *clevel
)
113 return D_SIZE
* lzo_sizeof(lzo_bytep
);
117 /***********************************************************************
118 // LZO1A decompress a block of data.
120 // Could be easily translated into assembly code.
121 ************************************************************************/
124 lzo1a_decompress ( const lzo_bytep in
, lzo_uint in_len
,
125 lzo_bytep out
, lzo_uintp out_len
,
131 const lzo_bytep m_pos
;
132 const lzo_bytep
const ip_end
= in
+ in_len
;
140 t
= *ip
++; /* get marker */
141 LZO_STATS(lzo_stats
->marker
[t
]++);
143 if (t
== 0) /* a R0 literal run */
146 if (t
>= R0FAST
- R0MIN
) /* a long R0 run */
154 t
= 256u << ((unsigned) t
);
156 /* help the optimizer */
158 do tt
<<= 1; while (--t
> 0);
168 else if (t
< R0MIN
) /* a short literal run */
173 /* after a literal a match must follow */
176 t
= *ip
++; /* get R1 marker */
180 /* R1 match - a context sensitive 3 byte match + 1 byte literal */
181 assert((t
& OMASK
) == t
);
182 m_pos
= op
- MIN_OFFSET
;
183 m_pos
-= t
| (((lzo_uint
) *ip
++) << OBITS
);
184 assert(m_pos
>= out
); assert(m_pos
< op
);
194 /* get match offset */
195 m_pos
= op
- MIN_OFFSET
;
196 m_pos
-= (t
& OMASK
) | (((lzo_uint
) *ip
++) << OBITS
);
197 assert(m_pos
>= out
); assert(m_pos
< op
);
200 if (t
< ((MSIZE
- 1) << OBITS
)) /* a short match */
205 MEMCPY_DS(op
,m_pos
,t
);
207 else /* a long match */
210 t
= (MIN_MATCH_LONG
- THRESHOLD
) + ((lzo_uint
)(*ip
++) & LMASK
);
212 t
= (MIN_MATCH_LONG
- THRESHOLD
) + (lzo_uint
)(*ip
++);
216 MEMCPY_DS(op
,m_pos
,t
);
218 /* a very short literal following a long match */
228 *out_len
= pd(op
, out
);
230 /* the next line is the only check in the decompressor */
231 return (ip
== ip_end
? LZO_E_OK
:
232 (ip
< ip_end
? LZO_E_INPUT_NOT_CONSUMED
: LZO_E_INPUT_OVERRUN
));
237 /***********************************************************************
238 // LZO1A compress a block of data.
240 // I apologize for the spaghetti code, but it really helps the optimizer.
241 ************************************************************************/
243 #include "lzo1a_cr.ch"
246 do_compress ( const lzo_bytep in
, lzo_uint in_len
,
247 lzo_bytep out
, lzo_uintp out_len
,
251 #if defined(__LZO_HASH_INCREMENTAL)
254 const lzo_bytep m_pos
;
256 const lzo_bytep
const ip_end
= in
+in_len
- DVAL_LEN
- MIN_MATCH_LONG
;
257 const lzo_bytep
const in_end
= in
+in_len
- DVAL_LEN
;
259 lzo_dict_p
const dict
= (lzo_dict_p
) wrkmem
;
260 const lzo_bytep r1
= ip_end
; /* pointer for R1 match (none yet) */
262 const lzo_bytep im
= ip_end
; /* pointer to last match start */
266 const lzo_bytep m_pos_sav
;
271 ii
= ip
; /* point to start of current literal run */
273 /* init dictionary */
274 #if (LZO_DETERMINISTIC)
275 BZERO8_PTR(wrkmem
,sizeof(lzo_dict_t
),D_SIZE
);
278 DVAL_FIRST(dv
,ip
); UPDATE_D(dict
,0,dv
,ip
,in
); ip
++;
282 LZO_DEFINE_UNINITIALIZED_VAR(lzo_uint
, m_off
, 0);
286 GINDEX(m_pos
,m_off
,dict
,dindex
,in
);
287 if (LZO_CHECK_MPOS_NON_DET(m_pos
,m_off
,in
,ip
,MAX_OFFSET
))
289 if (m_pos
[0] == ip
[0] && m_pos
[1] == ip
[1] && m_pos
[2] == ip
[2])
292 GINDEX(m_pos
,m_off
,dict
,dindex
,in
);
293 if (LZO_CHECK_MPOS_NON_DET(m_pos
,m_off
,in
,ip
,MAX_OFFSET
))
295 if (m_pos
[0] == ip
[0] && m_pos
[1] == ip
[1] && m_pos
[2] == ip
[2])
300 UPDATE_I(dict
,0,dindex
,ip
,in
);
306 UPDATE_I(dict
,0,dindex
,ip
,in
);
307 #if !defined(NDEBUG) && (LZO_DICT_USE_PTR)
308 assert(m_pos
== NULL
|| m_pos
>= in
);
313 /* we have found a match (of at least length 3) */
315 #if !defined(NDEBUG) && !(LZO_DICT_USE_PTR)
316 assert((m_pos_sav
= ip
- m_off
) == (m_pos
- 3));
322 /* 1) store the current literal run */
325 lzo_uint t
= pd(ip
,ii
);
327 if (ip
- r1
== MIN_MATCH
+ 1)
329 /* Code a context sensitive R1 match.
330 * This is tricky and somewhat difficult to explain:
331 * multiplex a literal run of length 1 into the previous
332 * short match of length MIN_MATCH.
334 * - after a short run a match MUST follow
335 * - therefore the value m = 000 in the mmmooooo marker is free
336 * - use 000ooooo to indicate a MIN_MATCH match (this
337 * is already coded) plus a 1 byte literal
340 /* modify marker byte */
341 assert((op
[-2] >> OBITS
) == (MIN_MATCH
- THRESHOLD
));
343 assert((op
[-2] >> OBITS
) == 0);
346 LZO_STATS(lzo_stats
->r1_matches
++);
347 r1
= ip
; /* set new R1 pointer */
351 /* inline the copying of a short run */
353 if (t
< (1 << (8-LBITS
)) && ii
- im
>= MIN_MATCH_LONG
)
355 /* Code a very short literal run into the
356 * previous long match length byte.
358 LZO_STATS(lzo_stats
->lit_runs_after_long_match
++);
359 LZO_STATS(lzo_stats
->lit_run_after_long_match
[t
]++);
360 assert(ii
- im
<= MAX_MATCH_LONG
);
361 assert((op
[-1] >> LBITS
) == 0);
362 op
[-1] = LZO_BYTE(op
[-1] | (t
<< LBITS
));
363 MEMCPY_DS(op
, ii
, t
);
368 LZO_STATS(lzo_stats
->lit_runs
++);
369 LZO_STATS(lzo_stats
->lit_run
[t
]++);
371 MEMCPY_DS(op
, ii
, t
);
372 r1
= ip
; /* set new R1 pointer */
377 /* inline the copying of a short R0 run */
378 LZO_STATS(lzo_stats
->r0short_runs
++);
379 *op
++ = 0; *op
++ = LZO_BYTE(t
- R0MIN
);
380 MEMCPY_DS(op
, ii
, t
);
381 r1
= ip
; /* set new R1 pointer */
384 op
= store_run(op
,ii
,t
);
391 /* 2) compute match len */
392 ii
= ip
; /* point to start of current match */
394 /* we already matched MIN_MATCH bytes,
395 * m_pos also already advanced MIN_MATCH bytes */
399 /* try to match another MIN_MATCH_LONG - MIN_MATCH bytes
400 * to see if we get a long match */
402 #define PS *m_pos++ != *ip++
404 #if (MIN_MATCH_LONG - MIN_MATCH == 2) /* MBITS == 2 */
406 #elif (MIN_MATCH_LONG - MIN_MATCH == 6) /* MBITS == 3 */
407 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
408 #elif (MIN_MATCH_LONG - MIN_MATCH == 14) /* MBITS == 4 */
409 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
410 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
411 #elif (MIN_MATCH_LONG - MIN_MATCH == 30) /* MBITS == 5 */
412 if (PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
413 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
414 PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
|| PS
||
415 PS
|| PS
|| PS
|| PS
|| PS
|| PS
)
417 # error "MBITS not yet implemented"
420 /* we've found a short match */
423 /* 2a) compute match parameters */
424 assert(ip
-m_pos
== (int)m_off
);
425 --ip
; /* ran one too far, point back to non-match */
427 assert(m_len
>= MIN_MATCH_SHORT
);
428 assert(m_len
<= MAX_MATCH_SHORT
);
429 assert(m_off
>= MIN_OFFSET
);
430 assert(m_off
<= MAX_OFFSET
);
431 assert(ii
-m_off
== m_pos_sav
);
432 assert(lzo_memcmp(m_pos_sav
,ii
,m_len
) == 0);
435 /* 2b) code a short match */
436 /* code short match len + low offset bits */
437 *op
++ = LZO_BYTE(((m_len
- THRESHOLD
) << OBITS
) |
439 /* code high offset bits */
440 *op
++ = LZO_BYTE(m_off
>> OBITS
);
443 #if (LZO_COLLECT_STATS)
444 lzo_stats
->short_matches
++;
445 lzo_stats
->short_match
[m_len
]++;
447 lzo_stats
->short_match_offset_osize
[m_len
]++;
449 lzo_stats
->short_match_offset_256
[m_len
]++;
451 lzo_stats
->short_match_offset_1024
[m_len
]++;
455 /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
457 #define SI /* nothing */
458 #define DI ++ii; DVAL_NEXT(dv,ii); UPDATE_D(dict,0,dv,ii,in);
459 #define XI assert(ii < ip); ii = ip; DVAL_FIRST(dv,(ip));
461 #if (CLEVEL == 9) || (CLEVEL >= 7 && MBITS <= 4) || (CLEVEL >= 5 && MBITS <= 3)
462 /* Insert the whole match (ii+1)..(ip-1) into dictionary. */
466 UPDATE_D(dict
,0,dv
,ii
,in
);
482 /* we've found a long match - see how far we can still go */
486 assert(ip
<= in_end
);
487 assert(ii
== ip
- MIN_MATCH_LONG
);
489 if (pd(in_end
,ip
) <= (MAX_MATCH_LONG
- MIN_MATCH_LONG
))
493 end
= ip
+ (MAX_MATCH_LONG
- MIN_MATCH_LONG
);
494 assert(end
< in_end
);
497 while (ip
< end
&& *m_pos
== *ip
)
499 assert(ip
<= in_end
);
501 /* 2a) compute match parameters */
503 assert(m_len
>= MIN_MATCH_LONG
);
504 assert(m_len
<= MAX_MATCH_LONG
);
505 assert(m_off
>= MIN_OFFSET
);
506 assert(m_off
<= MAX_OFFSET
);
507 assert(ii
-m_off
== m_pos_sav
);
508 assert(lzo_memcmp(m_pos_sav
,ii
,m_len
) == 0);
509 assert(pd(ip
,m_pos
) == m_off
);
512 /* 2b) code the long match */
513 /* code long match flag + low offset bits */
514 *op
++ = LZO_BYTE(((MSIZE
- 1) << OBITS
) | (m_off
& OMASK
));
515 /* code high offset bits */
516 *op
++ = LZO_BYTE(m_off
>> OBITS
);
518 *op
++ = LZO_BYTE(m_len
- MIN_MATCH_LONG
);
521 #if (LZO_COLLECT_STATS)
522 lzo_stats
->long_matches
++;
523 lzo_stats
->long_match
[m_len
]++;
527 /* 2c) Insert phrases (beginning with ii+1) into the dictionary. */
529 /* Insert the whole match (ii+1)..(ip-1) into dictionary. */
530 /* This is not recommended because it is slow. */
534 UPDATE_D(dict
,0,dv
,ii
,in
);
540 SI DI DI DI DI DI DI DI DI XI
542 SI DI DI DI DI DI DI DI XI
544 SI DI DI DI DI DI DI XI
558 /* ii now points to the start of the next literal run */
562 } while (ip
< ip_end
);
564 assert(ip
<= in_end
);
567 #if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
568 /* return -1 if op == out to indicate that we
569 * couldn't compress and didn't copy anything.
574 return LZO_E_NOT_COMPRESSIBLE
;
578 /* store the final literal run */
579 if (pd(in_end
+DVAL_LEN
,ii
) > 0)
580 op
= store_run(op
,ii
,pd(in_end
+DVAL_LEN
,ii
));
582 *out_len
= pd(op
, out
);
583 return 0; /* compression went ok */
587 /***********************************************************************
588 // LZO1A compress public entry point.
589 ************************************************************************/
592 lzo1a_compress ( const lzo_bytep in
, lzo_uint in_len
,
593 lzo_bytep out
, lzo_uintp out_len
,
599 #if (LZO_COLLECT_STATS)
600 lzo_memset(lzo_stats
,0,sizeof(*lzo_stats
));
601 lzo_stats
->rbits
= RBITS
;
602 lzo_stats
->clevel
= CLEVEL
;
603 lzo_stats
->dbits
= DBITS
;
604 lzo_stats
->lbits
= LBITS
;
605 lzo_stats
->min_match_short
= MIN_MATCH_SHORT
;
606 lzo_stats
->max_match_short
= MAX_MATCH_SHORT
;
607 lzo_stats
->min_match_long
= MIN_MATCH_LONG
;
608 lzo_stats
->max_match_long
= MAX_MATCH_LONG
;
609 lzo_stats
->min_offset
= MIN_OFFSET
;
610 lzo_stats
->max_offset
= MAX_OFFSET
;
611 lzo_stats
->r0min
= R0MIN
;
612 lzo_stats
->r0fast
= R0FAST
;
613 lzo_stats
->r0max
= R0MAX
;
614 lzo_stats
->in_len
= in_len
;
618 /* don't try to compress a block that's too short */
621 else if (in_len
<= MIN_MATCH_LONG
+ DVAL_LEN
+ 1)
623 #if defined(LZO_RETURN_IF_NOT_COMPRESSIBLE)
624 r
= LZO_E_NOT_COMPRESSIBLE
;
626 *out_len
= pd(store_run(out
,in
,in_len
), out
);
630 r
= do_compress(in
,in_len
,out
,out_len
,wrkmem
);
633 #if (LZO_COLLECT_STATS)
634 lzo_stats
->short_matches
-= lzo_stats
->r1_matches
;
635 lzo_stats
->short_match
[MIN_MATCH
] -= lzo_stats
->r1_matches
;
636 lzo_stats
->out_len
= *out_len
;
643 /* vim:set ts=4 sw=4 et: */