ldb: make test output more readable
[Samba/gebeck_regimport.git] / lib / compression / lzxpress.c
bloba4ded7e45554d31ec991cac30d7c1493f02ef37a
1 /*
2 * Copyright (C) Matthieu Suiche 2008
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the author nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
35 #include "replace.h"
36 #include "lzxpress.h"
37 #include "../lib/util/byteorder.h"
40 #define __BUF_POS_CONST(buf,ofs)(((const uint8_t *)buf)+(ofs))
41 #define __PULL_BYTE(buf,ofs) \
42 ((uint8_t)((*__BUF_POS_CONST(buf,ofs)) & 0xFF))
44 #ifndef PULL_LE_UINT16
45 #define PULL_LE_UINT16(buf,ofs) ((uint16_t)( \
46 ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \
47 ((uint16_t)(((uint16_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) \
49 #endif
51 #ifndef PULL_LE_UINT32
52 #define PULL_LE_UINT32(buf,ofs) ((uint32_t)( \
53 ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+0))) << 0)) | \
54 ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+1))) << 8)) | \
55 ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+2))) << 16)) | \
56 ((uint32_t)(((uint32_t)(__PULL_BYTE(buf,(ofs)+3))) << 24)) \
58 #endif
60 ssize_t lzxpress_compress(const uint8_t *uncompressed,
61 uint32_t uncompressed_size,
62 uint8_t *compressed,
63 uint32_t max_compressed_size)
65 uint32_t uncompressed_pos, compressed_pos, byte_left;
66 uint32_t max_offset, best_offset;
67 int32_t offset;
68 uint32_t max_len, len, best_len;
69 const uint8_t *str1, *str2;
70 uint32_t indic;
71 uint8_t *indic_pos;
72 uint32_t indic_bit, nibble_index;
74 uint32_t metadata_size;
75 uint16_t metadata;
76 uint16_t *dest;
78 if (!uncompressed_size) {
79 return 0;
82 uncompressed_pos = 0;
83 indic = 0;
84 *(uint32_t *)compressed = 0;
85 compressed_pos = sizeof(uint32_t);
86 indic_pos = &compressed[0];
88 byte_left = uncompressed_size;
89 indic_bit = 0;
90 nibble_index = 0;
92 if (uncompressed_pos > XPRESS_BLOCK_SIZE)
93 return 0;
95 do {
96 bool found = false;
98 max_offset = uncompressed_pos;
100 str1 = &uncompressed[uncompressed_pos];
102 best_len = 2;
103 best_offset = 0;
105 max_offset = MIN(0x1FFF, max_offset);
107 /* search for the longest match in the window for the lookahead buffer */
108 for (offset = 1; (uint32_t)offset <= max_offset; offset++) {
109 str2 = &str1[-offset];
111 /* maximum len we can encode into metadata */
112 max_len = MIN((255 + 15 + 7 + 3), byte_left);
114 for (len = 0; (len < max_len) && (str1[len] == str2[len]); len++);
117 * We check if len is better than the value found before, including the
118 * sequence of identical bytes
120 if (len > best_len) {
121 found = true;
122 best_len = len;
123 best_offset = offset;
127 if (found) {
128 metadata_size = 0;
129 dest = (uint16_t *)&compressed[compressed_pos];
131 if (best_len < 10) {
132 /* Classical meta-data */
133 metadata = (uint16_t)(((best_offset - 1) << 3) | (best_len - 3));
134 SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
135 metadata_size += sizeof(uint16_t);
136 } else {
137 metadata = (uint16_t)(((best_offset - 1) << 3) | 7);
138 SSVAL(dest, metadata_size / sizeof(uint16_t), metadata);
139 metadata_size = sizeof(uint16_t);
141 if (best_len < (15 + 7 + 3)) {
142 /* Shared byte */
143 if (!nibble_index) {
144 compressed[compressed_pos + metadata_size] = (best_len - (3 + 7)) & 0xF;
145 metadata_size += sizeof(uint8_t);
146 } else {
147 compressed[nibble_index] &= 0xF;
148 compressed[nibble_index] |= (best_len - (3 + 7)) * 16;
150 } else if (best_len < (3 + 7 + 15 + 255)) {
151 /* Shared byte */
152 if (!nibble_index) {
153 compressed[compressed_pos + metadata_size] = 15;
154 metadata_size += sizeof(uint8_t);
155 } else {
156 compressed[nibble_index] &= 0xF;
157 compressed[nibble_index] |= (15 * 16);
160 /* Additional best_len */
161 compressed[compressed_pos + metadata_size] = (best_len - (3 + 7 + 15)) & 0xFF;
162 metadata_size += sizeof(uint8_t);
163 } else {
164 /* Shared byte */
165 if (!nibble_index) {
166 compressed[compressed_pos + metadata_size] |= 15;
167 metadata_size += sizeof(uint8_t);
168 } else {
169 compressed[nibble_index] |= 15 << 4;
172 /* Additional best_len */
173 compressed[compressed_pos + metadata_size] = 255;
175 metadata_size += sizeof(uint8_t);
177 compressed[compressed_pos + metadata_size] = (best_len - 3) & 0xFF;
178 compressed[compressed_pos + metadata_size + 1] = ((best_len - 3) >> 8) & 0xFF;
179 metadata_size += sizeof(uint16_t);
183 indic |= 1 << (32 - ((indic_bit % 32) + 1));
185 if (best_len > 9) {
186 if (nibble_index == 0) {
187 nibble_index = compressed_pos + sizeof(uint16_t);
188 } else {
189 nibble_index = 0;
193 compressed_pos += metadata_size;
194 uncompressed_pos += best_len;
195 byte_left -= best_len;
196 } else {
197 compressed[compressed_pos++] = uncompressed[uncompressed_pos++];
198 byte_left--;
200 indic_bit++;
202 if ((indic_bit - 1) % 32 > (indic_bit % 32)) {
203 SIVAL(indic_pos, 0, indic);
204 indic = 0;
205 indic_pos = &compressed[compressed_pos];
206 compressed_pos += sizeof(uint32_t);
208 } while (byte_left > 3);
210 do {
211 compressed[compressed_pos] = uncompressed[uncompressed_pos];
212 indic_bit++;
214 uncompressed_pos++;
215 compressed_pos++;
216 if (((indic_bit - 1) % 32) > (indic_bit % 32)){
217 SIVAL(indic_pos, 0, indic);
218 indic = 0;
219 indic_pos = &compressed[compressed_pos];
220 compressed_pos += sizeof(uint32_t);
222 } while (uncompressed_pos < uncompressed_size);
224 if ((indic_bit % 32) > 0) {
225 for (; (indic_bit % 32) != 0; indic_bit++)
226 indic |= 0 << (32 - ((indic_bit % 32) + 1));
228 *(uint32_t *)&compressed[compressed_pos] = 0;
229 SIVAL(indic_pos, 0, indic);
230 compressed_pos += sizeof(uint32_t);
233 return compressed_pos;
236 ssize_t lzxpress_decompress(const uint8_t *input,
237 uint32_t input_size,
238 uint8_t *output,
239 uint32_t max_output_size)
241 uint32_t output_index, input_index;
242 uint32_t indicator, indicator_bit;
243 uint32_t length;
244 uint32_t offset;
245 uint32_t nibble_index;
247 output_index = 0;
248 input_index = 0;
249 indicator = 0;
250 indicator_bit = 0;
251 length = 0;
252 offset = 0;
253 nibble_index = 0;
255 do {
256 if (indicator_bit == 0) {
257 indicator = PULL_LE_UINT32(input, input_index);
258 input_index += sizeof(uint32_t);
259 indicator_bit = 32;
261 indicator_bit--;
264 * check whether the bit specified by indicator_bit is set or not
265 * set in indicator. For example, if indicator_bit has value 4
266 * check whether the 4th bit of the value in indicator is set
268 if (((indicator >> indicator_bit) & 1) == 0) {
269 output[output_index] = input[input_index];
270 input_index += sizeof(uint8_t);
271 output_index += sizeof(uint8_t);
272 } else {
273 length = PULL_LE_UINT16(input, input_index);
274 input_index += sizeof(uint16_t);
275 offset = length / 8;
276 length = length % 8;
278 if (length == 7) {
279 if (nibble_index == 0) {
280 nibble_index = input_index;
281 length = input[input_index] % 16;
282 input_index += sizeof(uint8_t);
283 } else {
284 length = input[nibble_index] / 16;
285 nibble_index = 0;
288 if (length == 15) {
289 length = input[input_index];
290 input_index += sizeof(uint8_t);
291 if (length == 255) {
292 length = PULL_LE_UINT16(input, input_index);
293 input_index += sizeof(uint16_t);
294 length -= (15 + 7);
296 length += 15;
298 length += 7;
301 length += 3;
303 do {
304 if ((output_index >= max_output_size) || ((offset + 1) > output_index)) break;
306 output[output_index] = output[output_index - offset - 1];
308 output_index += sizeof(uint8_t);
309 length -= sizeof(uint8_t);
310 } while (length != 0);
312 } while ((output_index < max_output_size) && (input_index < (input_size)));
314 return output_index;