s3: tests: Add new test_stream_dir_rename.sh test.
[Samba.git] / lib / compression / lzxpress.c
blob5e5e5baafc4593ab2f7c82719ac45e062a6c8e07
1 /*
2 * Copyright (C) Matthieu Suiche 2008
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the author nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
35 #include "replace.h"
36 #include "lzxpress.h"
37 #include "../lib/util/byteorder.h"
40 #define __CHECK_BYTES(__size, __index, __needed) do { \
41 if (unlikely(__index >= __size)) { \
42 return -1; \
43 } else { \
44 uint32_t __avail = __size - __index; \
45 if (unlikely(__needed > __avail)) { \
46 return -1; \
47 } \
48 } \
49 } while(0)
53 * LZX_PLAIN_COMP_HASH_BITS determines how big the hash table for finding
54 * matches will be.
56 * The window in which we look for matches is 8192 bytes. That means with
57 * random data a value of 13 is getting close to no collisions, while a 12
58 * will miss about half the possible matches. With compressible data there
59 * will generally be fewer and less diverse entries, so collisions are rarer.
61 * In the testsuite, bith 12 and 13 give better compression than Windows, but
62 * 12 is faster. 11 does not save time and costs accuracy. Thus we prefer 12.
64 #define LZX_PLAIN_COMP_HASH_BITS 12
66 * LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
67 * circular hash table for a match, before we give up. A bigger number will
68 * generally lead to better but slower compression, but a stupidly big number
69 * will just be worse.
71 #define LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS 5
72 #define HASH_MASK ((1 << LZX_PLAIN_COMP_HASH_BITS) - 1)
74 static inline uint16_t three_byte_hash(const uint8_t *bytes)
76 uint16_t a = bytes[0];
77 uint16_t b = bytes[1] ^ 0x2e;
78 uint16_t c = bytes[2] ^ 0x55;
79 uint16_t ca = c - a;
80 uint16_t d = ((a + b) << 8) ^ (ca << 5) ^ (c + b) ^ (0xcab + a);
81 return d & HASH_MASK;
85 static inline void store_match(uint32_t *hash_table,
86 uint16_t h,
87 uint32_t offset)
89 int i;
90 uint32_t o = hash_table[h];
91 uint16_t h2;
92 uint16_t worst_h;
93 int worst_score;
95 if (o >= offset) {
96 /* there is nothing there yet */
97 hash_table[h] = offset;
98 return;
100 for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
101 h2 = (h + i) & HASH_MASK;
102 if (hash_table[h2] >= offset) {
103 hash_table[h2] = offset;
104 return;
108 * There are no slots, but we really want to store this, so we'll kick
109 * out the one with the longest distance.
111 worst_h = h;
112 worst_score = offset - o;
113 for (i = 1; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
114 int score;
115 h2 = (h + i) & HASH_MASK;
116 o = hash_table[h2];
117 score = offset - o;
118 if (score > worst_score) {
119 worst_score = score;
120 worst_h = h2;
123 hash_table[worst_h] = offset;
127 struct match {
128 const uint8_t *there;
129 uint32_t length;
133 static inline struct match lookup_match(uint32_t *hash_table,
134 uint16_t h,
135 const uint8_t *data,
136 uint32_t offset,
137 size_t max_len)
139 int i;
140 uint32_t o;
141 uint16_t h2;
142 size_t len;
143 const uint8_t *there = NULL;
144 const uint8_t *here = data + offset;
145 struct match best = {0};
147 for (i = 0; i < LZX_PLAIN_COMP_HASH_SEARCH_ATTEMPTS; i++) {
148 h2 = (h + i) & HASH_MASK;
149 o = hash_table[h2];
150 if (o >= offset) {
152 * Either this is 0xffffffff, or something is really
153 * wrong.
155 * In setting this, we would never have stepped over
156 * an 0xffffffff, so we won't now.
158 break;
160 if (offset - o > 8192) {
161 /* Too far away to use */
162 continue;
164 there = data + o;
166 * When we already have a long match, we can try to avoid
167 * measuring out another long, but shorter match.
169 if (best.length > 1000 &&
170 there[best.length - 1] != best.there[best.length - 1]) {
171 continue;
174 for (len = 0;
175 len < max_len && here[len] == there[len];
176 len++) {
177 /* counting */
179 if (len > 2) {
180 if (len > best.length) {
181 best.length = len;
182 best.there = there;
186 return best;
189 struct write_context {
190 uint8_t *compressed;
191 uint32_t compressed_pos;
192 uint32_t max_compressed_size;
193 uint32_t indic;
194 uint32_t indic_bit;
195 uint32_t indic_pos;
196 uint32_t nibble_index;
200 #define CHECK_INPUT_BYTES(__needed) \
201 __CHECK_BYTES(uncompressed_size, uncompressed_pos, __needed)
202 #define CHECK_OUTPUT_BYTES(__needed) \
203 __CHECK_BYTES(wc->max_compressed_size, wc->compressed_pos, __needed)
206 static inline ssize_t push_indicator_bit(struct write_context *wc, uint32_t bit)
208 wc->indic = (wc->indic << 1) | bit;
209 wc->indic_bit += 1;
211 if (wc->indic_bit == 32) {
212 PUSH_LE_U32(wc->compressed, wc->indic_pos, wc->indic);
213 wc->indic_bit = 0;
214 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
215 wc->indic_pos = wc->compressed_pos;
216 wc->compressed_pos += sizeof(uint32_t);
218 return wc->indic_pos;
222 static ssize_t encode_match(struct write_context *wc,
223 struct match match,
224 const uint8_t *here)
226 uint32_t match_len = match.length - 3;
227 uint32_t best_offset = here - match.there - 1;
228 uint16_t metadata;
230 if (best_offset > 8191) {
231 return -1;
234 CHECK_OUTPUT_BYTES(sizeof(uint16_t));
235 metadata = (uint16_t)((best_offset << 3) | MIN(match_len, 7));
236 PUSH_LE_U16(wc->compressed, wc->compressed_pos, metadata);
237 wc->compressed_pos += sizeof(uint16_t);
239 if (match_len >= 7) {
240 match_len -= 7;
242 if (wc->nibble_index == 0) {
243 wc->nibble_index = wc->compressed_pos;
245 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
246 wc->compressed[wc->nibble_index] = MIN(match_len, 15);
247 wc->compressed_pos += sizeof(uint8_t);
248 } else {
249 wc->compressed[wc->nibble_index] |= MIN(match_len, 15) << 4;
250 wc->nibble_index = 0;
253 if (match_len >= 15) {
254 match_len -= 15;
256 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
257 wc->compressed[wc->compressed_pos] = MIN(match_len, 255);
258 wc->compressed_pos += sizeof(uint8_t);
260 if (match_len >= 255) {
261 /* Additional match_len */
263 match_len += 7 + 15;
265 if (match_len < (1 << 16)) {
266 CHECK_OUTPUT_BYTES(sizeof(uint16_t));
267 PUSH_LE_U16(wc->compressed, wc->compressed_pos,
268 match_len);
269 wc->compressed_pos += sizeof(uint16_t);
270 } else {
271 CHECK_OUTPUT_BYTES(sizeof(uint16_t) +
272 sizeof(uint32_t));
273 PUSH_LE_U16(wc->compressed,
274 wc->compressed_pos, 0);
275 wc->compressed_pos += sizeof(uint16_t);
277 PUSH_LE_U32(wc->compressed,
278 wc->compressed_pos,
279 match_len);
280 wc->compressed_pos += sizeof(uint32_t);
285 return push_indicator_bit(wc, 1);
288 #undef CHECK_OUTPUT_BYTES
289 #define CHECK_OUTPUT_BYTES(__needed) \
290 __CHECK_BYTES(wc.max_compressed_size, wc.compressed_pos, __needed)
293 ssize_t lzxpress_compress(const uint8_t *uncompressed,
294 uint32_t uncompressed_size,
295 uint8_t *compressed,
296 uint32_t max_compressed_size)
299 * This is the algorithm in [MS-XCA] 2.3 "Plain LZ77 Compression".
301 * It avoids Huffman encoding by including literal bytes inline when a
302 * match is not found. Every so often it includes a uint32 bit map
303 * flagging which positions contain matches and which contain
304 * literals. The encoding of matches is of variable size, depending on
305 * the match length; they are always at least 16 bits long, and can
306 * implicitly use unused half-bytes from earlier in the stream.
308 ssize_t ret;
309 uint32_t uncompressed_pos;
310 struct write_context wc = {
311 .indic = 0,
312 .indic_pos = 0,
313 .indic_bit = 0,
314 .nibble_index = 0,
315 .compressed = compressed,
316 .compressed_pos = 0,
317 .max_compressed_size = max_compressed_size
319 uint32_t hash_table[1 << LZX_PLAIN_COMP_HASH_BITS];
320 memset(hash_table, 0xff, sizeof(hash_table));
322 if (!uncompressed_size) {
323 return 0;
326 uncompressed_pos = 0;
327 CHECK_OUTPUT_BYTES(sizeof(uint32_t));
328 PUSH_LE_U32(wc.compressed, wc.compressed_pos, 0);
329 wc.compressed_pos += sizeof(uint32_t);
331 while ((uncompressed_pos < uncompressed_size) &&
332 (wc.compressed_pos < wc.max_compressed_size)) {
334 /* maximum len we can encode into metadata */
335 const uint32_t max_len = MIN(0xFFFF + 3,
336 uncompressed_size - uncompressed_pos);
337 const uint8_t *here = uncompressed + uncompressed_pos;
338 uint16_t h;
339 struct match match = {0};
341 if (max_len >= 3) {
342 h = three_byte_hash(here);
343 match = lookup_match(hash_table,
345 uncompressed,
346 uncompressed_pos,
347 max_len);
349 store_match(hash_table, h, uncompressed_pos);
350 } else {
351 match.there = NULL;
352 match.length = 0;
355 if (match.there == NULL) {
357 * This is going to be a literal byte, which we flag
358 * by setting a bit in an indicator field somewhere
359 * earlier in the stream.
361 CHECK_INPUT_BYTES(sizeof(uint8_t));
362 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
363 wc.compressed[wc.compressed_pos++] = *here;
364 uncompressed_pos++;
366 ret = push_indicator_bit(&wc, 0);
367 if (ret < 0) {
368 return ret;
370 } else {
371 ret = encode_match(&wc, match, here);
372 if (ret < 0) {
373 return ret;
375 uncompressed_pos += match.length;
379 if (wc.indic_bit != 0) {
380 wc.indic <<= 32 - wc.indic_bit;
382 wc.indic |= UINT32_MAX >> wc.indic_bit;
383 PUSH_LE_U32(wc.compressed, wc.indic_pos, wc.indic);
385 return wc.compressed_pos;
388 ssize_t lzxpress_decompress(const uint8_t *input,
389 uint32_t input_size,
390 uint8_t *output,
391 uint32_t max_output_size)
394 * This is the algorithm in [MS-XCA] 2.4 "Plain LZ77 Decompression
395 * Algorithm Details".
397 uint32_t output_index, input_index;
398 uint32_t indicator, indicator_bit;
399 uint32_t nibble_index;
401 if (input_size == 0) {
402 return 0;
405 output_index = 0;
406 input_index = 0;
407 indicator = 0;
408 indicator_bit = 0;
409 nibble_index = 0;
411 #undef CHECK_INPUT_BYTES
412 #define CHECK_INPUT_BYTES(__needed) \
413 __CHECK_BYTES(input_size, input_index, __needed)
414 #undef CHECK_OUTPUT_BYTES
415 #define CHECK_OUTPUT_BYTES(__needed) \
416 __CHECK_BYTES(max_output_size, output_index, __needed)
418 do {
419 if (indicator_bit == 0) {
420 CHECK_INPUT_BYTES(sizeof(uint32_t));
421 indicator = PULL_LE_U32(input, input_index);
422 input_index += sizeof(uint32_t);
423 if (input_index == input_size) {
425 * The compressor left room for indicator
426 * flags for data that doesn't exist.
428 break;
430 indicator_bit = 32;
432 indicator_bit--;
435 * check whether the bit specified by indicator_bit is set or not
436 * set in indicator. For example, if indicator_bit has value 4
437 * check whether the 4th bit of the value in indicator is set
439 if (((indicator >> indicator_bit) & 1) == 0) {
440 CHECK_INPUT_BYTES(sizeof(uint8_t));
441 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
442 output[output_index] = input[input_index];
443 input_index += sizeof(uint8_t);
444 output_index += sizeof(uint8_t);
445 } else {
446 uint32_t length;
447 uint32_t offset;
449 CHECK_INPUT_BYTES(sizeof(uint16_t));
450 length = PULL_LE_U16(input, input_index);
451 input_index += sizeof(uint16_t);
452 offset = (length >> 3) + 1;
453 length &= 7;
455 if (length == 7) {
456 if (nibble_index == 0) {
457 CHECK_INPUT_BYTES(sizeof(uint8_t));
458 nibble_index = input_index;
459 length = input[input_index] & 0xf;
460 input_index += sizeof(uint8_t);
461 } else {
462 length = input[nibble_index] >> 4;
463 nibble_index = 0;
466 if (length == 15) {
467 CHECK_INPUT_BYTES(sizeof(uint8_t));
468 length = input[input_index];
469 input_index += sizeof(uint8_t);
470 if (length == 255) {
471 CHECK_INPUT_BYTES(sizeof(uint16_t));
472 length = PULL_LE_U16(input, input_index);
473 input_index += sizeof(uint16_t);
474 if (length == 0) {
475 CHECK_INPUT_BYTES(sizeof(uint32_t));
476 length = PULL_LE_U32(input, input_index);
477 input_index += sizeof(uint32_t);
480 if (length < (15 + 7)) {
481 return -1;
483 length -= (15 + 7);
485 length += 15;
487 length += 7;
489 length += 3;
491 if (length == 0) {
492 return -1;
495 for (; length > 0; --length) {
496 if (offset > output_index) {
497 return -1;
499 CHECK_OUTPUT_BYTES(sizeof(uint8_t));
500 output[output_index] = output[output_index - offset];
501 output_index += sizeof(uint8_t);
504 } while ((output_index < max_output_size) && (input_index < (input_size)));
506 return output_index;