Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[linux-2.6.git] / lib / lz4 / lz4hc_compress.c
blobf344f76b6559620bf7ae3bdaaeb6ab25265a344e
1 /*
2 * LZ4 HC - High Compression Mode of LZ4
3 * Copyright (C) 2011-2012, Yann Collet.
4 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 * You can contact the author at :
30 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 * - LZ4 source repository : http://code.google.com/p/lz4/
33 * Changed for kernel use by:
34 * Chanho Min <chanho.min@lge.com>
37 #include <linux/module.h>
38 #include <linux/kernel.h>
39 #include <linux/lz4.h>
40 #include <asm/unaligned.h>
41 #include "lz4defs.h"
43 struct lz4hc_data {
44 const u8 *base;
45 HTYPE hashtable[HASHTABLESIZE];
46 u16 chaintable[MAXD];
47 const u8 *nexttoupdate;
48 } __attribute__((__packed__));
50 static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
52 memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53 memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
55 #if LZ4_ARCH64
56 hc4->nexttoupdate = base + 1;
57 #else
58 hc4->nexttoupdate = base;
59 #endif
60 hc4->base = base;
61 return 1;
64 /* Update chains up to ip (excluded) */
65 static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
67 u16 *chaintable = hc4->chaintable;
68 HTYPE *hashtable = hc4->hashtable;
69 #if LZ4_ARCH64
70 const BYTE * const base = hc4->base;
71 #else
72 const int base = 0;
73 #endif
75 while (hc4->nexttoupdate < ip) {
76 const u8 *p = hc4->nexttoupdate;
77 size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78 if (delta > MAX_DISTANCE)
79 delta = MAX_DISTANCE;
80 chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81 hashtable[HASH_VALUE(p)] = (p) - base;
82 hc4->nexttoupdate++;
86 static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
87 const u8 *const matchlimit)
89 const u8 *p1t = p1;
91 while (p1t < matchlimit - (STEPSIZE - 1)) {
92 #if LZ4_ARCH64
93 u64 diff = A64(p2) ^ A64(p1t);
94 #else
95 u32 diff = A32(p2) ^ A32(p1t);
96 #endif
97 if (!diff) {
98 p1t += STEPSIZE;
99 p2 += STEPSIZE;
100 continue;
102 p1t += LZ4_NBCOMMONBYTES(diff);
103 return p1t - p1;
105 #if LZ4_ARCH64
106 if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
107 p1t += 4;
108 p2 += 4;
110 #endif
112 if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
113 p1t += 2;
114 p2 += 2;
116 if ((p1t < matchlimit) && (*p2 == *p1t))
117 p1t++;
118 return p1t - p1;
121 static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
122 const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
124 u16 *const chaintable = hc4->chaintable;
125 HTYPE *const hashtable = hc4->hashtable;
126 const u8 *ref;
127 #if LZ4_ARCH64
128 const BYTE * const base = hc4->base;
129 #else
130 const int base = 0;
131 #endif
132 int nbattempts = MAX_NB_ATTEMPTS;
133 size_t repl = 0, ml = 0;
134 u16 delta;
136 /* HC4 match finder */
137 lz4hc_insert(hc4, ip);
138 ref = hashtable[HASH_VALUE(ip)] + base;
140 /* potential repetition */
141 if (ref >= ip-4) {
142 /* confirmed */
143 if (A32(ref) == A32(ip)) {
144 delta = (u16)(ip-ref);
145 repl = ml = lz4hc_commonlength(ip + MINMATCH,
146 ref + MINMATCH, matchlimit) + MINMATCH;
147 *matchpos = ref;
149 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
152 while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
153 nbattempts--;
154 if (*(ref + ml) == *(ip + ml)) {
155 if (A32(ref) == A32(ip)) {
156 size_t mlt =
157 lz4hc_commonlength(ip + MINMATCH,
158 ref + MINMATCH, matchlimit) + MINMATCH;
159 if (mlt > ml) {
160 ml = mlt;
161 *matchpos = ref;
165 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
168 /* Complete table */
169 if (repl) {
170 const BYTE *ptr = ip;
171 const BYTE *end;
172 end = ip + repl - (MINMATCH-1);
173 /* Pre-Load */
174 while (ptr < end - delta) {
175 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
176 ptr++;
178 do {
179 chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
180 /* Head of chain */
181 hashtable[HASH_VALUE(ptr)] = (ptr) - base;
182 ptr++;
183 } while (ptr < end);
184 hc4->nexttoupdate = end;
187 return (int)ml;
190 static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
191 const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
192 const u8 **matchpos, const u8 **startpos)
194 u16 *const chaintable = hc4->chaintable;
195 HTYPE *const hashtable = hc4->hashtable;
196 #if LZ4_ARCH64
197 const BYTE * const base = hc4->base;
198 #else
199 const int base = 0;
200 #endif
201 const u8 *ref;
202 int nbattempts = MAX_NB_ATTEMPTS;
203 int delta = (int)(ip - startlimit);
205 /* First Match */
206 lz4hc_insert(hc4, ip);
207 ref = hashtable[HASH_VALUE(ip)] + base;
209 while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
210 && (nbattempts)) {
211 nbattempts--;
212 if (*(startlimit + longest) == *(ref - delta + longest)) {
213 if (A32(ref) == A32(ip)) {
214 const u8 *reft = ref + MINMATCH;
215 const u8 *ipt = ip + MINMATCH;
216 const u8 *startt = ip;
218 while (ipt < matchlimit-(STEPSIZE - 1)) {
219 #if LZ4_ARCH64
220 u64 diff = A64(reft) ^ A64(ipt);
221 #else
222 u32 diff = A32(reft) ^ A32(ipt);
223 #endif
225 if (!diff) {
226 ipt += STEPSIZE;
227 reft += STEPSIZE;
228 continue;
230 ipt += LZ4_NBCOMMONBYTES(diff);
231 goto _endcount;
233 #if LZ4_ARCH64
234 if ((ipt < (matchlimit - 3))
235 && (A32(reft) == A32(ipt))) {
236 ipt += 4;
237 reft += 4;
239 ipt += 2;
240 #endif
241 if ((ipt < (matchlimit - 1))
242 && (A16(reft) == A16(ipt))) {
243 reft += 2;
245 if ((ipt < matchlimit) && (*reft == *ipt))
246 ipt++;
247 _endcount:
248 reft = ref;
250 while ((startt > startlimit)
251 && (reft > hc4->base)
252 && (startt[-1] == reft[-1])) {
253 startt--;
254 reft--;
257 if ((ipt - startt) > longest) {
258 longest = (int)(ipt - startt);
259 *matchpos = reft;
260 *startpos = startt;
264 ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
266 return longest;
269 static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
270 int ml, const u8 *ref)
272 int length, len;
273 u8 *token;
275 /* Encode Literal length */
276 length = (int)(*ip - *anchor);
277 token = (*op)++;
278 if (length >= (int)RUN_MASK) {
279 *token = (RUN_MASK << ML_BITS);
280 len = length - RUN_MASK;
281 for (; len > 254 ; len -= 255)
282 *(*op)++ = 255;
283 *(*op)++ = (u8)len;
284 } else
285 *token = (length << ML_BITS);
287 /* Copy Literals */
288 LZ4_BLINDCOPY(*anchor, *op, length);
290 /* Encode Offset */
291 LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
293 /* Encode MatchLength */
294 len = (int)(ml - MINMATCH);
295 if (len >= (int)ML_MASK) {
296 *token += ML_MASK;
297 len -= ML_MASK;
298 for (; len > 509 ; len -= 510) {
299 *(*op)++ = 255;
300 *(*op)++ = 255;
302 if (len > 254) {
303 len -= 255;
304 *(*op)++ = 255;
306 *(*op)++ = (u8)len;
307 } else
308 *token += len;
310 /* Prepare next loop */
311 *ip += ml;
312 *anchor = *ip;
314 return 0;
317 static int lz4_compresshcctx(struct lz4hc_data *ctx,
318 const char *source,
319 char *dest,
320 int isize)
322 const u8 *ip = (const u8 *)source;
323 const u8 *anchor = ip;
324 const u8 *const iend = ip + isize;
325 const u8 *const mflimit = iend - MFLIMIT;
326 const u8 *const matchlimit = (iend - LASTLITERALS);
328 u8 *op = (u8 *)dest;
330 int ml, ml2, ml3, ml0;
331 const u8 *ref = NULL;
332 const u8 *start2 = NULL;
333 const u8 *ref2 = NULL;
334 const u8 *start3 = NULL;
335 const u8 *ref3 = NULL;
336 const u8 *start0;
337 const u8 *ref0;
338 int lastrun;
340 ip++;
342 /* Main Loop */
343 while (ip < mflimit) {
344 ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
345 if (!ml) {
346 ip++;
347 continue;
350 /* saved, in case we would skip too much */
351 start0 = ip;
352 ref0 = ref;
353 ml0 = ml;
354 _search2:
355 if (ip+ml < mflimit)
356 ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
357 ip + 1, matchlimit, ml, &ref2, &start2);
358 else
359 ml2 = ml;
360 /* No better match */
361 if (ml2 == ml) {
362 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
363 continue;
366 if (start0 < ip) {
367 /* empirical */
368 if (start2 < ip + ml0) {
369 ip = start0;
370 ref = ref0;
371 ml = ml0;
375 * Here, start0==ip
376 * First Match too small : removed
378 if ((start2 - ip) < 3) {
379 ml = ml2;
380 ip = start2;
381 ref = ref2;
382 goto _search2;
385 _search3:
387 * Currently we have :
388 * ml2 > ml1, and
389 * ip1+3 <= ip2 (usually < ip1+ml1)
391 if ((start2 - ip) < OPTIMAL_ML) {
392 int correction;
393 int new_ml = ml;
394 if (new_ml > OPTIMAL_ML)
395 new_ml = OPTIMAL_ML;
396 if (ip + new_ml > start2 + ml2 - MINMATCH)
397 new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
398 correction = new_ml - (int)(start2 - ip);
399 if (correction > 0) {
400 start2 += correction;
401 ref2 += correction;
402 ml2 -= correction;
406 * Now, we have start2 = ip+new_ml,
407 * with new_ml=min(ml, OPTIMAL_ML=18)
409 if (start2 + ml2 < mflimit)
410 ml3 = lz4hc_insertandgetwidermatch(ctx,
411 start2 + ml2 - 3, start2, matchlimit,
412 ml2, &ref3, &start3);
413 else
414 ml3 = ml2;
416 /* No better match : 2 sequences to encode */
417 if (ml3 == ml2) {
418 /* ip & ref are known; Now for ml */
419 if (start2 < ip+ml)
420 ml = (int)(start2 - ip);
422 /* Now, encode 2 sequences */
423 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
424 ip = start2;
425 lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
426 continue;
429 /* Not enough space for match 2 : remove it */
430 if (start3 < ip + ml + 3) {
432 * can write Seq1 immediately ==> Seq2 is removed,
433 * so Seq3 becomes Seq1
435 if (start3 >= (ip + ml)) {
436 if (start2 < ip + ml) {
437 int correction =
438 (int)(ip + ml - start2);
439 start2 += correction;
440 ref2 += correction;
441 ml2 -= correction;
442 if (ml2 < MINMATCH) {
443 start2 = start3;
444 ref2 = ref3;
445 ml2 = ml3;
449 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
450 ip = start3;
451 ref = ref3;
452 ml = ml3;
454 start0 = start2;
455 ref0 = ref2;
456 ml0 = ml2;
457 goto _search2;
460 start2 = start3;
461 ref2 = ref3;
462 ml2 = ml3;
463 goto _search3;
467 * OK, now we have 3 ascending matches; let's write at least
468 * the first one ip & ref are known; Now for ml
470 if (start2 < ip + ml) {
471 if ((start2 - ip) < (int)ML_MASK) {
472 int correction;
473 if (ml > OPTIMAL_ML)
474 ml = OPTIMAL_ML;
475 if (ip + ml > start2 + ml2 - MINMATCH)
476 ml = (int)(start2 - ip) + ml2
477 - MINMATCH;
478 correction = ml - (int)(start2 - ip);
479 if (correction > 0) {
480 start2 += correction;
481 ref2 += correction;
482 ml2 -= correction;
484 } else
485 ml = (int)(start2 - ip);
487 lz4_encodesequence(&ip, &op, &anchor, ml, ref);
489 ip = start2;
490 ref = ref2;
491 ml = ml2;
493 start2 = start3;
494 ref2 = ref3;
495 ml2 = ml3;
497 goto _search3;
500 /* Encode Last Literals */
501 lastrun = (int)(iend - anchor);
502 if (lastrun >= (int)RUN_MASK) {
503 *op++ = (RUN_MASK << ML_BITS);
504 lastrun -= RUN_MASK;
505 for (; lastrun > 254 ; lastrun -= 255)
506 *op++ = 255;
507 *op++ = (u8) lastrun;
508 } else
509 *op++ = (lastrun << ML_BITS);
510 memcpy(op, anchor, iend - anchor);
511 op += iend - anchor;
512 /* End */
513 return (int) (((char *)op) - dest);
516 int lz4hc_compress(const unsigned char *src, size_t src_len,
517 unsigned char *dst, size_t *dst_len, void *wrkmem)
519 int ret = -1;
520 int out_len = 0;
522 struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
523 lz4hc_init(hc4, (const u8 *)src);
524 out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
525 (char *)dst, (int)src_len);
527 if (out_len < 0)
528 goto exit;
530 *dst_len = out_len;
531 return 0;
533 exit:
534 return ret;
536 EXPORT_SYMBOL(lz4hc_compress);
538 MODULE_LICENSE("Dual BSD/GPL");
539 MODULE_DESCRIPTION("LZ4HC compressor");