Improved and Pythonized helenos-pkg-config
[helenos.git] / uspace / lib / compress / inflate.c
blobfd5360dd022a75095bb398416296ea77da61b241
1 /*
2 * Copyright (c) 2010 Mark Adler
3 * Copyright (c) 2010 Martin Decky
4 * All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @file
31 * @brief Implementation of inflate decompression
33 * A simple inflate implementation (decompression of `deflate' stream as
34 * described by RFC 1951) based on puff.c by Mark Adler. This code is
35 * currently not optimized for speed but for readability.
37 * All dynamically allocated memory memory is taken from the stack. The
38 * stack usage should be typically bounded by 2 KB.
40 * Original copyright notice:
42 * Copyright (C) 2002-2010 Mark Adler, all rights reserved
43 * version 2.1, 4 Apr 2010
45 * This software is provided 'as-is', without any express or implied
46 * warranty. In no event will the author be held liable for any damages
47 * arising from the use of this software.
49 * Permission is granted to anyone to use this software for any purpose,
50 * including commercial applications, and to alter it and redistribute it
51 * freely, subject to the following restrictions:
53 * 1. The origin of this software must not be misrepresented; you must not
54 * claim that you wrote the original software. If you use this software
55 * in a product, an acknowledgment in the product documentation would be
56 * appreciated but is not required.
57 * 2. Altered source versions must be plainly marked as such, and must not
58 * be misrepresented as being the original software.
59 * 3. This notice may not be removed or altered from any source
60 * distribution.
62 * Mark Adler <madler@alumni.caltech.edu>
66 #include <stddef.h>
67 #include <stdint.h>
68 #include <stdbool.h>
69 #include <errno.h>
70 #include <mem.h>
71 #include "inflate.h"
73 /** Maximum bits in the Huffman code */
74 #define MAX_HUFFMAN_BIT 15
76 /** Number of length codes */
77 #define MAX_LEN 29
78 /** Number of distance codes */
79 #define MAX_DIST 30
80 /** Number of order codes */
81 #define MAX_ORDER 19
82 /** Number of literal/length codes */
83 #define MAX_LITLEN 286
84 /** Number of fixed literal/length codes */
85 #define MAX_FIXED_LITLEN 288
87 /** Number of all codes */
88 #define MAX_CODE (MAX_LITLEN + MAX_DIST)
90 /** Check for input buffer overrun condition */
91 #define CHECK_OVERRUN(state) \
92 do { \
93 if ((state).overrun) \
94 return ELIMIT; \
95 } while (false)
97 /** Inflate algorithm state
100 typedef struct {
101 uint8_t *dest; /**< Output buffer */
102 size_t destlen; /**< Output buffer size */
103 size_t destcnt; /**< Position in the output buffer */
105 uint8_t *src; /**< Input buffer */
106 size_t srclen; /**< Input buffer size */
107 size_t srccnt; /**< Position in the input buffer */
109 uint16_t bitbuf; /**< Bit buffer */
110 size_t bitlen; /**< Number of bits in the bit buffer */
112 bool overrun; /**< Overrun condition */
113 } inflate_state_t;
115 /** Huffman code description
118 typedef struct {
119 uint16_t *count; /**< Array of symbol counts */
120 uint16_t *symbol; /**< Array of symbols */
121 } huffman_t;
123 /** Length codes
126 static const uint16_t lens[MAX_LEN] = {
127 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
128 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
131 /** Extended length codes
134 static const uint16_t lens_ext[MAX_LEN] = {
135 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
136 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
139 /** Distance codes
142 static const uint16_t dists[MAX_DIST] = {
143 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
144 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
145 8193, 12289, 16385, 24577
148 /** Extended distance codes
151 static const uint16_t dists_ext[MAX_DIST] = {
152 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
153 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
154 12, 12, 13, 13
157 /** Order codes
160 static const short order[MAX_ORDER] = {
161 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
164 /** Static length symbol counts
167 static uint16_t len_count[MAX_HUFFMAN_BIT + 1] = {
168 0, 0, 0, 0, 0, 0, 0, 24, 152, 112, 0, 0, 0, 0, 0, 0
171 /** Static length symbols
174 static uint16_t len_symbol[MAX_FIXED_LITLEN] = {
175 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268,
176 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 0, 1, 2,
177 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
178 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
179 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
180 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
181 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
182 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
183 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
184 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
185 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
186 140, 141, 142, 143, 280, 281, 282, 283, 284, 285, 286, 287, 144,
187 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
188 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
189 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
190 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
191 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
192 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
193 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
194 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
195 249, 250, 251, 252, 253, 254, 255
198 /** Static distance symbol counts
201 static uint16_t dist_count[MAX_HUFFMAN_BIT + 1] = {
202 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
205 /** Static distance symbols
208 static uint16_t dist_symbol[MAX_DIST] = {
209 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
210 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
213 /** Huffman code for lengths
216 static huffman_t len_code = {
217 .count = len_count,
218 .symbol = len_symbol
221 /** Huffman code for distances
224 static huffman_t dist_code = {
225 .count = dist_count,
226 .symbol = dist_symbol
229 /** Get bits from the bit buffer
231 * @param state Inflate state.
232 * @param cnt Number of bits to return (at most 16).
234 * @return Returned bits.
237 static inline uint16_t get_bits(inflate_state_t *state, size_t cnt)
239 /* Bit accumulator for at least 20 bits */
240 uint32_t val = state->bitbuf;
242 while (state->bitlen < cnt) {
243 if (state->srccnt == state->srclen) {
244 state->overrun = true;
245 return 0;
248 /* Load 8 more bits */
249 val |= ((uint32_t) state->src[state->srccnt]) << state->bitlen;
250 state->srccnt++;
251 state->bitlen += 8;
254 /* Update bits in the buffer */
255 state->bitbuf = (uint16_t) (val >> cnt);
256 state->bitlen -= cnt;
258 return ((uint16_t) (val & ((1 << cnt) - 1)));
261 /** Decode `stored' block
263 * @param state Inflate state.
265 * @return EOK on success.
266 * @return ELIMIT on input buffer overrun.
267 * @return ENOMEM on output buffer overrun.
268 * @return EINVAL on invalid data.
271 static errno_t inflate_stored(inflate_state_t *state)
273 /* Discard bits in the bit buffer */
274 state->bitbuf = 0;
275 state->bitlen = 0;
277 if (state->srccnt + 4 > state->srclen)
278 return ELIMIT;
280 uint16_t len =
281 state->src[state->srccnt] | (state->src[state->srccnt + 1] << 8);
282 uint16_t len_compl =
283 state->src[state->srccnt + 2] | (state->src[state->srccnt + 3] << 8);
285 /* Check block length and its complement */
286 if (((int16_t) len) != ~((int16_t) len_compl))
287 return EINVAL;
289 state->srccnt += 4;
291 /* Check input buffer size */
292 if (state->srccnt + len > state->srclen)
293 return ELIMIT;
295 /* Check output buffer size */
296 if (state->destcnt + len > state->destlen)
297 return ENOMEM;
299 /* Copy data */
300 memcpy(state->dest + state->destcnt, state->src + state->srccnt, len);
301 state->srccnt += len;
302 state->destcnt += len;
304 return EOK;
307 /** Decode a symbol using the Huffman code
309 * @param state Inflate state.
310 * @param huffman Huffman code.
311 * @param symbol Decoded symbol.
313 * @param EOK on success.
314 * @param EINVAL on invalid Huffman code.
317 static errno_t huffman_decode(inflate_state_t *state, huffman_t *huffman,
318 uint16_t *symbol)
320 /* Decode bits */
321 uint16_t code = 0;
323 /* First code of the given length */
324 size_t first = 0;
327 * Index of the first code of the given length
328 * in the symbol table
330 size_t index = 0;
332 /* Current number of bits in the code */
333 size_t len;
335 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
336 /* Get next bit */
337 code |= get_bits(state, 1);
338 CHECK_OVERRUN(*state);
340 uint16_t count = huffman->count[len];
341 if (code < first + count) {
342 /* Return decoded symbol */
343 *symbol = huffman->symbol[index + code - first];
344 return EOK;
347 /* Update for next length */
348 index += count;
349 first += count;
350 first <<= 1;
351 code <<= 1;
354 return EINVAL;
357 /** Construct Huffman tables from canonical Huffman code
359 * @param huffman Constructed Huffman tables.
360 * @param length Lengths of the canonical Huffman code.
361 * @param n Number of lengths.
363 * @return 0 if the Huffman code set is complete.
364 * @return Negative value for an over-subscribed code set.
365 * @return Positive value for an incomplete code set.
368 static int16_t huffman_construct(huffman_t *huffman, uint16_t *length, size_t n)
370 /* Count number of codes for each length */
371 size_t len;
372 for (len = 0; len <= MAX_HUFFMAN_BIT; len++)
373 huffman->count[len] = 0;
375 /* We assume that the lengths are within bounds */
376 size_t symbol;
377 for (symbol = 0; symbol < n; symbol++)
378 huffman->count[length[symbol]]++;
380 if (huffman->count[0] == n) {
381 /* The code is complete, but decoding will fail */
382 return 0;
385 /* Check for an over-subscribed or incomplete set of lengths */
386 int16_t left = 1;
387 for (len = 1; len <= MAX_HUFFMAN_BIT; len++) {
388 left <<= 1;
389 left -= huffman->count[len];
390 if (left < 0) {
391 /* Over-subscribed */
392 return left;
396 /* Generate offsets into symbol table */
397 uint16_t offs[MAX_HUFFMAN_BIT + 1];
399 offs[1] = 0;
400 for (len = 1; len < MAX_HUFFMAN_BIT; len++)
401 offs[len + 1] = offs[len] + huffman->count[len];
403 for (symbol = 0; symbol < n; symbol++) {
404 if (length[symbol] != 0) {
405 huffman->symbol[offs[length[symbol]]] = symbol;
406 offs[length[symbol]]++;
410 return left;
413 /** Decode literal/length and distance codes
415 * Decode until end-of-block code.
417 * @param state Inflate state.
418 * @param len_code Huffman code for literal/length.
419 * @param dist_code Huffman code for distance.
421 * @return EOK on success.
422 * @return ENOENT on distance too large.
423 * @return EINVAL on invalid Huffman code.
424 * @return ELIMIT on input buffer overrun.
425 * @return ENOMEM on output buffer overrun.
428 static errno_t inflate_codes(inflate_state_t *state, huffman_t *len_code,
429 huffman_t *dist_code)
431 uint16_t symbol;
433 do {
434 errno_t err = huffman_decode(state, len_code, &symbol);
435 if (err != EOK) {
436 /* Error decoding */
437 return err;
440 if (symbol < 256) {
441 /* Write out literal */
442 if (state->destcnt == state->destlen)
443 return ENOMEM;
445 state->dest[state->destcnt] = (uint8_t) symbol;
446 state->destcnt++;
447 } else if (symbol > 256) {
448 /* Compute length */
449 symbol -= 257;
450 if (symbol >= 29)
451 return EINVAL;
453 size_t len = lens[symbol] + get_bits(state, lens_ext[symbol]);
454 CHECK_OVERRUN(*state);
456 /* Get distance */
457 err = huffman_decode(state, dist_code, &symbol);
458 if (err != EOK)
459 return err;
461 size_t dist = dists[symbol] + get_bits(state, dists_ext[symbol]);
462 if (dist > state->destcnt)
463 return ENOENT;
465 if (state->destcnt + len > state->destlen)
466 return ENOMEM;
468 while (len > 0) {
469 /* Copy len bytes from distance bytes back */
470 state->dest[state->destcnt] =
471 state->dest[state->destcnt - dist];
472 state->destcnt++;
473 len--;
476 } while (symbol != 256);
478 return EOK;
481 /** Decode `fixed codes' block
483 * @param state Inflate state.
484 * @param len_code Huffman code for literal/length.
485 * @param dist_code Huffman code for distance.
487 * @return EOK on success.
488 * @return ENOENT on distance too large.
489 * @return EINVAL on invalid Huffman code.
490 * @return ELIMIT on input buffer overrun.
491 * @return ENOMEM on output buffer overrun.
494 static errno_t inflate_fixed(inflate_state_t *state, huffman_t *len_code,
495 huffman_t *dist_code)
497 return inflate_codes(state, len_code, dist_code);
500 /** Decode `dynamic codes' block
502 * @param state Inflate state.
504 * @return EOK on success.
505 * @return ENOENT on distance too large.
506 * @return EINVAL on invalid Huffman code.
507 * @return ELIMIT on input buffer overrun.
508 * @return ENOMEM on output buffer overrun.
511 static errno_t inflate_dynamic(inflate_state_t *state)
513 uint16_t length[MAX_CODE];
514 uint16_t dyn_len_count[MAX_HUFFMAN_BIT + 1];
515 uint16_t dyn_len_symbol[MAX_LITLEN];
516 uint16_t dyn_dist_count[MAX_HUFFMAN_BIT + 1];
517 uint16_t dyn_dist_symbol[MAX_DIST];
518 huffman_t dyn_len_code;
519 huffman_t dyn_dist_code;
521 dyn_len_code.count = dyn_len_count;
522 dyn_len_code.symbol = dyn_len_symbol;
524 dyn_dist_code.count = dyn_dist_count;
525 dyn_dist_code.symbol = dyn_dist_symbol;
527 /* Get number of bits in each table */
528 uint16_t nlen = get_bits(state, 5) + 257;
529 CHECK_OVERRUN(*state);
531 uint16_t ndist = get_bits(state, 5) + 1;
532 CHECK_OVERRUN(*state);
534 uint16_t ncode = get_bits(state, 4) + 4;
535 CHECK_OVERRUN(*state);
537 if ((nlen > MAX_LITLEN) || (ndist > MAX_DIST) ||
538 (ncode > MAX_ORDER))
539 return EINVAL;
541 /* Read code length code lengths */
542 uint16_t index;
543 for (index = 0; index < ncode; index++) {
544 length[order[index]] = get_bits(state, 3);
545 CHECK_OVERRUN(*state);
548 /* Set missing lengths to zero */
549 for (index = ncode; index < MAX_ORDER; index++)
550 length[order[index]] = 0;
552 /* Build Huffman code */
553 int16_t rc = huffman_construct(&dyn_len_code, length, MAX_ORDER);
554 if (rc != 0)
555 return EINVAL;
557 /* Read length/literal and distance code length tables */
558 index = 0;
559 while (index < nlen + ndist) {
560 uint16_t symbol;
561 errno_t err = huffman_decode(state, &dyn_len_code, &symbol);
562 if (err != EOK)
563 return EOK;
565 if (symbol < 16) {
566 length[index] = symbol;
567 index++;
568 } else {
569 uint16_t len = 0;
571 if (symbol == 16) {
572 if (index == 0)
573 return EINVAL;
575 len = length[index - 1];
576 symbol = get_bits(state, 2) + 3;
577 CHECK_OVERRUN(*state);
578 } else if (symbol == 17) {
579 symbol = get_bits(state, 3) + 3;
580 CHECK_OVERRUN(*state);
581 } else {
582 symbol = get_bits(state, 7) + 11;
583 CHECK_OVERRUN(*state);
586 if (index + symbol > nlen + ndist)
587 return EINVAL;
589 while (symbol > 0) {
590 length[index] = len;
591 index++;
592 symbol--;
597 /* Check for end-of-block code */
598 if (length[256] == 0)
599 return EINVAL;
601 /* Build Huffman tables for literal/length codes */
602 rc = huffman_construct(&dyn_len_code, length, nlen);
603 if ((rc < 0) || ((rc > 0) && (dyn_len_code.count[0] + 1 != nlen)))
604 return EINVAL;
606 /* Build Huffman tables for distance codes */
607 rc = huffman_construct(&dyn_dist_code, length + nlen, ndist);
608 if ((rc < 0) || ((rc > 0) && (dyn_dist_code.count[0] + 1 != ndist)))
609 return EINVAL;
611 return inflate_codes(state, &dyn_len_code, &dyn_dist_code);
614 /** Inflate data
616 * @param src Source data buffer.
617 * @param srclen Source buffer size (bytes).
618 * @param dest Destination data buffer.
619 * @param destlen Destination buffer size (bytes).
621 * @return EOK on success.
622 * @return ENOENT on distance too large.
623 * @return EINVAL on invalid Huffman code or invalid deflate data.
624 * @return ELIMIT on input buffer overrun.
625 * @return ENOMEM on output buffer overrun.
628 errno_t inflate(void *src, size_t srclen, void *dest, size_t destlen)
630 /* Initialize the state */
631 inflate_state_t state;
633 state.dest = (uint8_t *) dest;
634 state.destlen = destlen;
635 state.destcnt = 0;
637 state.src = (uint8_t *) src;
638 state.srclen = srclen;
639 state.srccnt = 0;
641 state.bitbuf = 0;
642 state.bitlen = 0;
644 state.overrun = false;
646 uint16_t last;
647 errno_t ret = EOK;
649 do {
650 /* Last block is indicated by a non-zero bit */
651 last = get_bits(&state, 1);
652 CHECK_OVERRUN(state);
654 /* Block type */
655 uint16_t type = get_bits(&state, 2);
656 CHECK_OVERRUN(state);
658 switch (type) {
659 case 0:
660 ret = inflate_stored(&state);
661 break;
662 case 1:
663 ret = inflate_fixed(&state, &len_code, &dist_code);
664 break;
665 case 2:
666 ret = inflate_dynamic(&state);
667 break;
668 default:
669 ret = EINVAL;
671 } while ((!last) && (ret == 0));
673 return ret;