2 * Samba compression library - LGPLv3
4 * Copyright © Catalyst IT 2022
6 * Written by Douglas Bagnall <douglas.bagnall@catalyst.net.nz>
7 * and Joseph Sutton <josephsutton@catalyst.net.nz>
9 * ** NOTE! The following LGPL license applies to this file.
10 * ** It does NOT imply that all of Samba is released under the LGPL
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 3 of the License, or (at your option) any later version.
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
29 #include "lzxpress_huffman.h"
30 #include "lib/util/stable_sort.h"
31 #include "lib/util/debug.h"
32 #include "lib/util/byteorder.h"
33 #include "lib/util/bytearray.h"
36 * DEBUG_NO_LZ77_MATCHES toggles the encoding of matches as matches. If it is
37 * false the potential match is written as a series of literals, which is a
38 * valid but usually inefficient encoding. This is useful for isolating a
39 * problem to either the LZ77 or the Huffman stage.
41 #ifndef DEBUG_NO_LZ77_MATCHES
42 #define DEBUG_NO_LZ77_MATCHES false
46 * DEBUG_HUFFMAN_TREE forces the drawing of ascii art huffman trees during
47 * compression and decompression.
49 * These trees will also be drawn at DEBUG level 10, but that doesn't work
52 #ifndef DEBUG_HUFFMAN_TREE
53 #define DEBUG_HUFFMAN_TREE false
56 #if DEBUG_HUFFMAN_TREE
57 #define DBG(...) fprintf(stderr, __VA_ARGS__)
59 #define DBG(...) DBG_INFO(__VA_ARGS__)
63 #define LZXPRESS_ERROR -1LL
66 * We won't encode a match length longer than MAX_MATCH_LENGTH.
68 * Reports are that Windows has a limit at 64M.
70 #define MAX_MATCH_LENGTH (64 * 1024 * 1024)
83 #if ! defined __has_builtin
84 #define __has_builtin(x) 0
88 * bitlen_nonzero_16() returns the bit number of the most significant bit, or
89 * put another way, the integer log base 2. Log(0) is undefined; the argument
96 * Probably this is handled by a compiler intrinsic function that maps to a
97 * dedicated machine instruction.
100 static inline int bitlen_nonzero_16(uint16_t x
)
102 #if __has_builtin(__builtin_clz)
104 /* __builtin_clz returns the number of leading zeros */
105 return (sizeof(unsigned int) * CHAR_BIT
) - 1
106 - __builtin_clz((unsigned int) x
);
121 struct lzxhuff_compressor_context
{
122 const uint8_t *input_bytes
;
125 size_t prev_block_pos
;
127 size_t available_size
;
131 static int compare_huffman_node_count(struct huffman_node
*a
,
132 struct huffman_node
*b
)
134 return a
->count
- b
->count
;
137 static int compare_huffman_node_depth(struct huffman_node
*a
,
138 struct huffman_node
*b
)
140 int c
= a
->depth
- b
->depth
;
144 return (int)a
->symbol
- (int)b
->symbol
;
148 #define HASH_MASK ((1 << LZX_HUFF_COMP_HASH_BITS) - 1)
150 static inline uint16_t three_byte_hash(const uint8_t *bytes
)
153 * MS-XCA says "three byte hash", but does not specify it.
155 * This one is just cobbled together, but has quite good distribution
156 * in the 12-14 bit forms, which is what we care about most.
157 * e.g: 13 bit: median 2048, min 2022, max 2074, stddev 6.0
159 uint16_t a
= bytes
[0];
160 uint16_t b
= bytes
[1] ^ 0x2e;
161 uint16_t c
= bytes
[2] ^ 0x55;
163 uint16_t d
= ((a
+ b
) << 8) ^ (ca
<< 5) ^ (c
+ b
) ^ (0xcab + a
);
164 return d
& HASH_MASK
;
168 static inline uint16_t encode_match(size_t len
, size_t offset
)
171 code
|= MIN(len
- 3, 15);
172 code
|= bitlen_nonzero_16(offset
) << 4;
177 * debug_huffman_tree() uses debug_huffman_tree_print() to draw the Huffman
180 * Note that the Huffman tree is probably not the same as that implied by the
181 * canonical Huffman encoding that is finally used. That tree would be the
182 * same shape, but with the left and right toggled to sort the branches by
183 * length, after which the symbols for each length sorted by value.
186 static void debug_huffman_tree_print(struct huffman_node
*node
,
187 int *trail
, int depth
)
189 if (node
->left
== NULL
) {
190 /* time to print a row */
192 bool branched
= false;
195 int s
= node
->symbol
;
199 " \033[1;31m Max depth exceeded! (%d)\033[0m "
200 " symbol %#3x claimed depth %d count %d\n",
201 depth
, node
->symbol
, node
->depth
, node
->count
);
204 for (j
= depth
- 1; j
>= 0; j
--) {
206 if (trail
[j
] == -1) {
211 } else if (trail
[j
] == -1) {
218 for (j
= 0; j
< depth
; j
++) {
222 fprintf(stderr
, " ");
226 fprintf(stderr
, " │ ");
230 fprintf(stderr
, " ╰─");
234 fprintf(stderr
, "%5d─┬─", row
[j
]);
240 snprintf(c
, sizeof(c
),
241 "\033[1;32m%02x\033[0m \033[1;33m%c%c%c\033[0m",
243 0xE2, 0x90, 0x80 + s
); /* utf-8 for symbol */
244 } else if (s
< 127) {
245 snprintf(c
, sizeof(c
),
246 "\033[1;32m%2x\033[0m '\033[10;32m%c\033[0m'",
248 } else if (s
< 256) {
249 snprintf(c
, sizeof(c
), "\033[1;32m%2x\033[0m", s
);
251 uint16_t len
= (s
& 15) + 3;
252 uint16_t dbits
= ((s
>> 4) & 15) + 1;
253 snprintf(c
, sizeof(c
),
254 " \033[0;33mlen:%2d%s, "
255 "dist:%d-%d \033[0m \033[1;32m%3x\033[0m%s",
257 len
== 18 ? "+" : "",
261 s
== 256 ? " \033[1;31mEOF\033[0m" : "");
265 fprintf(stderr
, "──%5d %s \033[2;37m%s\033[0m\n",
266 node
->count
, c
, code
);
269 trail
[depth
] = node
->count
;
270 debug_huffman_tree_print(node
->left
, trail
, depth
+ 1);
272 debug_huffman_tree_print(node
->right
, trail
, depth
+ 1);
277 * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree()
278 * will print a tree looking something like this:
280 * 7─┬─── 3 len:18+, dist:1-1 10f 0
281 * ╰─ 4─┬─ 2─┬─── 1 61 'a' 100
282 * │ ╰─── 1 62 'b' 101
283 * ╰─ 2─┬─── 1 63 'c' 110
284 * ╰─── 1 len: 3, dist:1-1 100 EOF 111
286 * This is based off a Huffman root node, and the tree may not be the same as
287 * the canonical tree.
289 static void debug_huffman_tree(struct huffman_node
*root
)
292 debug_huffman_tree_print(root
, trail
, 0);
297 * If DEBUG_HUFFMAN_TREE is defined true, debug_huffman_tree_from_table()
298 * will print something like this based on a decoding symbol table.
300 * Tree from decoding table 9 nodes → 5 codes
301 * 10000─┬─── 5000 len:18+, dist:1-1 10f 0
302 * ╰─ 5000─┬─ 2500─┬─── 1250 61 'a' 100
303 * │ ╰─── 1250 62 'b' 101
304 * ╰─ 2500─┬─── 1250 63 'c' 110
305 * ╰─── 1250 len: 3, dist:1-1 100 EOF 111
307 * This is the canonical form of the Huffman tree where the actual counts
308 * aren't known (we use "10000" to help indicate relative frequencies).
310 static void debug_huffman_tree_from_table(uint16_t *table
)
313 struct huffman_node nodes
[1024] = {{0}};
314 uint16_t codes
[1024];
318 nodes
[0].count
= 10000;
321 uint16_t index
= codes
[i
];
322 struct huffman_node
*node
= &nodes
[i
];
323 if (table
[index
] == 0xffff) {
329 node
->left
= nodes
+ n
;
330 nodes
[n
].count
= node
->count
>> 1;
335 node
->right
= nodes
+ n
;
336 nodes
[n
].count
= node
->count
>> 1;
340 node
->symbol
= table
[index
] & 511;
346 "\033[1;34m Tree from decoding table\033[0m "
347 "%zu nodes → %zu codes\n",
349 debug_huffman_tree_print(nodes
, trail
, 0);
353 static bool depth_walk(struct huffman_node
*n
, uint32_t depth
)
356 if (n
->left
== NULL
) {
357 /* this is a leaf, record the depth */
364 ok
= (depth_walk(n
->left
, depth
+ 1) &&
365 depth_walk(n
->right
, depth
+ 1));
371 static bool check_and_record_depths(struct huffman_node
*root
)
373 return depth_walk(root
, 0);
377 static bool encode_values(struct huffman_node
*leaves
,
379 uint16_t symbol_values
[512])
383 * See, we have a leading 1 in our internal code representation, which
384 * indicates the code length.
387 uint32_t code_len
= 0;
388 memset(symbol_values
, 0, sizeof(uint16_t) * 512);
389 for (i
= 0; i
< n_leaves
; i
++) {
390 code
<<= leaves
[i
].depth
- code_len
;
391 code_len
= leaves
[i
].depth
;
393 symbol_values
[leaves
[i
].symbol
] = code
;
397 * The last code should be 11111... with code_len + 1 ones. The final
398 * code++ will wrap this round to 1000... with code_len + 1 zeroes.
401 if (code
!= 2 << code_len
) {
408 static int generate_huffman_codes(struct huffman_node
*leaf_nodes
,
409 struct huffman_node
*internal_nodes
,
410 uint16_t symbol_values
[512])
412 size_t head_leaf
= 0;
413 size_t head_branch
= 0;
414 size_t tail_branch
= 0;
415 struct huffman_node
*huffman_root
= NULL
;
420 * Before we sort the nodes, we can eliminate the unused ones.
422 for (i
= 0; i
< 512; i
++) {
423 if (leaf_nodes
[i
].count
) {
424 leaf_nodes
[n_leaves
] = leaf_nodes
[i
];
429 return LZXPRESS_ERROR
;
433 * There is *almost* no way this should happen, and it would
434 * ruin the tree (because the shortest possible codes are 1
435 * bit long, and there are two of them).
437 * The only way to get here is in an internal block in a
438 * 3-or-more block message (i.e. > 128k), which consists
439 * entirely of a match starting in the previous block (if it
440 * was the end block, it would have the EOF symbol).
442 * What we do is add a dummy symbol which is this one XOR 256.
443 * It won't be used in the stream but will balance the tree.
445 leaf_nodes
[1] = leaf_nodes
[0];
446 leaf_nodes
[1].symbol
^= 0x100;
450 /* note, in sort we're using internal_nodes as auxillary space */
451 stable_sort(leaf_nodes
,
454 sizeof(struct huffman_node
),
455 (samba_compare_fn_t
)compare_huffman_node_count
);
458 * This outer loop is for re-quantizing the counts if the tree is too
459 * tall (>15), which we need to do because the final encoding can't
460 * express a tree that deep.
462 * In theory, this should be a 'while (true)' loop, but we chicken
463 * out with 10 iterations, just in case.
465 * In practice it will almost always resolve in the first round; if
466 * not then, in the second or third. Remember we'll looking at 64k or
467 * less, so the rarest we can have is 1 in 64k; each round of
468 * quantization effecively doubles its frequency to 1 in 32k, 1 in
469 * 16k, etc, until we're treating the rare symbol as actually quite
472 for (j
= 0; j
< 10; j
++) {
473 bool less_than_15_bits
;
475 struct huffman_node
*a
= NULL
;
476 struct huffman_node
*b
= NULL
;
477 size_t leaf_len
= n_leaves
- head_leaf
;
478 size_t internal_len
= tail_branch
- head_branch
;
480 if (leaf_len
+ internal_len
== 1) {
482 * We have the complete tree. The root will be
483 * an internal node unless there is just one
484 * symbol, which is already impossible.
486 if (unlikely(leaf_len
== 1)) {
487 return LZXPRESS_ERROR
;
490 &internal_nodes
[head_branch
];
495 * We know here we have at least two nodes, and we
496 * want to select the two lowest scoring ones. Those
497 * have to be either a) the head of each queue, or b)
498 * the first two nodes of either queue.
500 * The complicating factors are: a) we need to check
501 * the length of each queue, and b) in the case of
502 * ties, we prefer to pair leaves with leaves.
504 * Note a complication we don't have: the leaf node
505 * queue never grows, and the subtree queue starts
506 * empty and cannot grow beyond n - 1. It feeds on
507 * itself. We don't need to think about overflow.
510 /* two from subtrees */
511 a
= &internal_nodes
[head_branch
];
512 b
= &internal_nodes
[head_branch
+ 1];
514 } else if (internal_len
== 0) {
516 a
= &leaf_nodes
[head_leaf
];
517 b
= &leaf_nodes
[head_leaf
+ 1];
519 } else if (leaf_len
== 1 && internal_len
== 1) {
521 a
= &leaf_nodes
[head_leaf
];
522 b
= &internal_nodes
[head_branch
];
527 * Take the lowest head, twice, checking for
528 * length after taking the first one.
530 if (leaf_nodes
[head_leaf
].count
>
531 internal_nodes
[head_branch
].count
) {
532 a
= &internal_nodes
[head_branch
];
534 if (internal_len
== 1) {
535 b
= &leaf_nodes
[head_leaf
];
540 a
= &leaf_nodes
[head_leaf
];
543 b
= &internal_nodes
[head_branch
];
549 if (leaf_nodes
[head_leaf
].count
>
550 internal_nodes
[head_branch
].count
) {
551 b
= &internal_nodes
[head_branch
];
554 b
= &leaf_nodes
[head_leaf
];
560 * Now we add a new node to the subtrees list that
561 * combines the score of node_a and node_b, and points
562 * to them as children.
564 internal_nodes
[tail_branch
].count
= a
->count
+ b
->count
;
565 internal_nodes
[tail_branch
].left
= a
;
566 internal_nodes
[tail_branch
].right
= b
;
568 if (tail_branch
== n_leaves
) {
570 * We're not getting here, no way, never ever.
571 * Unless we made a terible mistake.
573 * That is, in a binary tree with n leaves,
574 * there are ALWAYS n-1 internal nodes.
576 return LZXPRESS_ERROR
;
579 if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE
) {
580 debug_huffman_tree(huffman_root
);
583 * We have a tree, and need to turn it into a lookup table,
584 * and see if it is shallow enough (<= 15).
586 less_than_15_bits
= check_and_record_depths(huffman_root
);
587 if (less_than_15_bits
) {
589 * Now the leaf nodes know how deep they are, and we
590 * no longer need the internal nodes.
592 * We need to sort the nodes of equal depth, so that
593 * they are sorted by depth first, and symbol value
594 * second. The internal_nodes can again be auxillary
601 sizeof(struct huffman_node
),
602 (samba_compare_fn_t
)compare_huffman_node_depth
);
604 encode_values(leaf_nodes
, n_leaves
, symbol_values
);
610 * requantize by halfing and rounding up, so that small counts
611 * become relatively bigger. This will lead to a flatter tree.
613 for (i
= 0; i
< n_leaves
; i
++) {
614 leaf_nodes
[i
].count
>>= 1;
615 leaf_nodes
[i
].count
+= 1;
621 return LZXPRESS_ERROR
;
625 * LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS is how far ahead to search in the
626 * circular hash table for a match, before we give up. A bigger number will
627 * generally lead to better but slower compression, but a stupidly big number
628 * will just be worse.
630 * If you're fiddling with this, consider also fiddling with
631 * LZX_HUFF_COMP_HASH_BITS.
633 #define LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS 5
635 static inline void store_match(uint16_t *hash_table
,
640 uint16_t o
= hash_table
[h
];
646 /* there is nothing there yet */
647 hash_table
[h
] = offset
;
650 for (i
= 1; i
< LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
651 h2
= (h
+ i
) & HASH_MASK
;
652 if (hash_table
[h2
] == 0xffff) {
653 hash_table
[h2
] = offset
;
658 * There are no slots, but we really want to store this, so we'll kick
659 * out the one with the longest distance.
662 worst_score
= offset
- o
;
663 for (i
= 1; i
< LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
665 h2
= (h
+ i
) & HASH_MASK
;
668 if (score
> worst_score
) {
673 hash_table
[worst_h
] = offset
;
678 * Yes, struct match looks a lot like a DATA_BLOB.
681 const uint8_t *there
;
686 static inline struct match
lookup_match(uint16_t *hash_table
,
693 uint16_t o
= hash_table
[h
];
696 const uint8_t *there
= NULL
;
697 struct match best
= {0};
699 for (i
= 0; i
< LZX_HUFF_COMP_HASH_SEARCH_ATTEMPTS
; i
++) {
700 h2
= (h
+ i
) & HASH_MASK
;
704 * in setting this, we would never have stepped over
705 * an 0xffff, so we won't now.
710 if (here
- there
> 65534 || there
> here
) {
715 * When we already have a long match, we can try to avoid
716 * measuring out another long, but shorter match.
718 if (best
.length
> 1000 &&
719 there
[best
.length
- 1] != best
.there
[best
.length
- 1]) {
724 len
< max_len
&& here
[len
] == there
[len
];
730 * As a tiebreaker, we prefer the closer match which
731 * is likely to encode smaller (and certainly no worse).
733 if (len
> best
.length
||
734 (len
== best
.length
&& there
> best
.there
)) {
745 static ssize_t
lz77_encode_block(struct lzxhuff_compressor_context
*cmp_ctx
,
746 struct lzxhuff_compressor_mem
*cmp_mem
,
747 uint16_t *hash_table
,
748 uint16_t *prev_hash_table
)
750 uint16_t *intermediate
= cmp_mem
->intermediate
;
751 struct huffman_node
*leaf_nodes
= cmp_mem
->leaf_nodes
;
752 uint16_t *symbol_values
= cmp_mem
->symbol_values
;
753 size_t i
, j
, intermediate_len
;
754 const uint8_t *data
= cmp_ctx
->input_bytes
+ cmp_ctx
->input_pos
;
755 const uint8_t *prev_block
= NULL
;
756 size_t remaining_size
= cmp_ctx
->input_size
- cmp_ctx
->input_pos
;
757 size_t block_end
= MIN(65536, remaining_size
);
761 if (cmp_ctx
->input_size
< cmp_ctx
->input_pos
) {
762 return LZXPRESS_ERROR
;
765 if (cmp_ctx
->prev_block_pos
!= cmp_ctx
->input_pos
) {
766 prev_block
= cmp_ctx
->input_bytes
+ cmp_ctx
->prev_block_pos
;
767 } else if (prev_hash_table
!= NULL
) {
768 /* we've got confused! hash and block should go together */
769 return LZXPRESS_ERROR
;
773 * leaf_nodes is used to count the symbols seen, for later Huffman
776 for (i
= 0; i
< 512; i
++) {
777 leaf_nodes
[i
] = (struct huffman_node
) {
784 if (remaining_size
< 41 || DEBUG_NO_LZ77_MATCHES
) {
786 * There is no point doing a hash table and looking for
787 * matches in this tiny block (remembering we are committed to
788 * using 32 bits, so there's a good chance we wouldn't even
789 * save a byte). The threshold of 41 matches Windows.
790 * If remaining_size < 3, we *can't* do the hash.
795 * We use 0xffff as the unset value for table, because it is
796 * not a valid match offset (and 0x0 is).
798 memset(hash_table
, 0xff, sizeof(cmp_mem
->hash_table1
));
800 for (i
= 0; i
<= block_end
- 3; i
++) {
802 const uint8_t *here
= data
+ i
;
803 uint16_t h
= three_byte_hash(here
);
804 size_t max_len
= MIN(remaining_size
- i
, MAX_MATCH_LENGTH
);
805 match
= lookup_match(hash_table
,
811 if (match
.there
== NULL
&& prev_hash_table
!= NULL
) {
813 * If this is not the first block,
814 * backreferences can look into the previous
815 * block (but only as far as 65535 bytes, so
816 * the end of this block cannot see the start
819 match
= lookup_match(prev_hash_table
,
826 store_match(hash_table
, h
, i
);
828 if (match
.there
== NULL
) {
829 /* add a literal and move on. */
831 leaf_nodes
[c
].count
++;
838 if (match
.length
<= 65538) {
839 intermediate
[j
] = 0xffff;
840 intermediate
[j
+ 1] = match
.length
- 3;
841 intermediate
[j
+ 2] = here
- match
.there
;
844 size_t m
= match
.length
- 3;
845 intermediate
[j
] = 0xfffe;
846 intermediate
[j
+ 1] = m
& 0xffff;
847 intermediate
[j
+ 2] = m
>> 16;
848 intermediate
[j
+ 3] = here
- match
.there
;
851 code
= encode_match(match
.length
, here
- match
.there
);
852 leaf_nodes
[code
].count
++;
853 i
+= match
.length
- 1; /* `- 1` for the loop i++ */
855 * A match can take us past the intended block length,
856 * extending the block. We don't need to do anything
857 * special for this case -- the loops will naturally
858 * do the right thing.
864 * There might be some bytes at the end.
866 for (; i
< block_end
; i
++) {
867 leaf_nodes
[data
[i
]].count
++;
868 intermediate
[j
] = data
[i
];
872 if (i
== remaining_size
) {
873 /* add a trailing EOF marker (256) */
874 intermediate
[j
] = 0xffff;
875 intermediate
[j
+ 1] = 0;
876 intermediate
[j
+ 2] = 1;
878 leaf_nodes
[256].count
++;
881 intermediate_len
= j
;
883 cmp_ctx
->prev_block_pos
= cmp_ctx
->input_pos
;
884 cmp_ctx
->input_pos
+= i
;
886 /* fill in the symbols table */
887 n_symbols
= generate_huffman_codes(leaf_nodes
,
888 cmp_mem
->internal_nodes
,
894 return intermediate_len
;
899 static ssize_t
write_huffman_table(uint16_t symbol_values
[512],
901 size_t available_size
)
905 if (available_size
< 256) {
906 return LZXPRESS_ERROR
;
909 for (i
= 0; i
< 256; i
++) {
911 uint16_t even
= symbol_values
[i
* 2];
912 uint16_t odd
= symbol_values
[i
* 2 + 1];
914 b
= bitlen_nonzero_16(even
);
917 b
|= bitlen_nonzero_16(odd
) << 4;
925 struct write_context
{
928 size_t head
; /* where lengths go */
929 size_t next_code
; /* where symbol stream goes */
930 size_t pending_next_code
; /* will be next_code */
936 * Write out 16 bits, little-endian, for write_huffman_codes()
938 * As you'll notice, there's a bit to do.
940 * We are collecting up bits in a uint32_t, then when there are 16 of them we
941 * write out a word into the stream, using a trio of offsets (wc->next_code,
942 * wc->pending_next_code, and wc->head) which dance around ensuring that the
943 * bitstream and the interspersed lengths are in the right places relative to
947 static inline bool write_bits(struct write_context
*wc
,
948 uint16_t code
, uint16_t length
)
952 wc
->bit_len
+= length
;
953 if (wc
->bit_len
> 16) {
954 uint32_t w
= wc
->bits
>> (wc
->bit_len
- 16);
956 if (wc
->next_code
+ 2 > wc
->dest_len
||
957 unlikely(wc
->bit_len
> 16)) {
960 wc
->dest
[wc
->next_code
] = w
& 0xff;
961 wc
->dest
[wc
->next_code
+ 1] = (w
>> 8) & 0xff;
962 wc
->next_code
= wc
->pending_next_code
;
963 wc
->pending_next_code
= wc
->head
;
970 static inline bool write_code(struct write_context
*wc
, uint16_t code
)
972 int code_bit_len
= bitlen_nonzero_16(code
);
973 if (unlikely(code
== 0)) {
976 code
&= (1 << code_bit_len
) - 1;
977 return write_bits(wc
, code
, code_bit_len
);
980 static inline bool write_byte(struct write_context
*wc
, uint8_t byte
)
982 if (wc
->head
+ 1 > wc
->dest_len
) {
985 wc
->dest
[wc
->head
] = byte
;
991 static inline bool write_long_len(struct write_context
*wc
, size_t len
)
994 if (wc
->head
+ 3 > wc
->dest_len
) {
997 wc
->dest
[wc
->head
] = 255;
998 wc
->dest
[wc
->head
+ 1] = len
& 255;
999 wc
->dest
[wc
->head
+ 2] = len
>> 8;
1002 if (wc
->head
+ 7 > wc
->dest_len
) {
1005 wc
->dest
[wc
->head
] = 255;
1006 wc
->dest
[wc
->head
+ 1] = 0;
1007 wc
->dest
[wc
->head
+ 2] = 0;
1008 wc
->dest
[wc
->head
+ 3] = len
& 255;
1009 wc
->dest
[wc
->head
+ 4] = (len
>> 8) & 255;
1010 wc
->dest
[wc
->head
+ 5] = (len
>> 16) & 255;
1011 wc
->dest
[wc
->head
+ 6] = (len
>> 24) & 255;
1017 static ssize_t
write_compressed_bytes(uint16_t symbol_values
[512],
1018 uint16_t *intermediate
,
1019 size_t intermediate_len
,
1026 struct write_context wc
= {
1028 .pending_next_code
= 2,
1030 .dest_len
= dest_len
1032 for (i
= 0; i
< intermediate_len
; i
++) {
1033 uint16_t c
= intermediate
[i
];
1036 uint16_t code_len
= 0;
1037 uint16_t code_dist
= 0;
1039 ok
= write_code(&wc
, symbol_values
[c
]);
1041 return LZXPRESS_ERROR
;
1047 if (i
> intermediate_len
- 4) {
1048 return LZXPRESS_ERROR
;
1051 len
= intermediate
[i
+ 1];
1052 len
|= intermediate
[i
+ 2] << 16U;
1053 distance
= intermediate
[i
+ 3];
1055 } else if (c
== 0xffff) {
1056 if (i
> intermediate_len
- 3) {
1057 return LZXPRESS_ERROR
;
1059 len
= intermediate
[i
+ 1];
1060 distance
= intermediate
[i
+ 2];
1063 return LZXPRESS_ERROR
;
1065 if (unlikely(distance
== 0)) {
1066 return LZXPRESS_ERROR
;
1068 /* len has already had 3 subtracted */
1071 * We are going to need to write extra length
1072 * bytes into the stream, but we don't do it
1073 * now, we do it after the code has been
1074 * written (and before the distance bits).
1080 code_dist
= bitlen_nonzero_16(distance
);
1081 c
= 256 | (code_dist
<< 4) | code_len
;
1083 return LZXPRESS_ERROR
;
1086 ok
= write_code(&wc
, symbol_values
[c
]);
1088 return LZXPRESS_ERROR
;
1091 if (code_len
== 15) {
1093 ok
= write_long_len(&wc
, len
);
1095 ok
= write_byte(&wc
, len
- 15);
1098 return LZXPRESS_ERROR
;
1101 if (code_dist
!= 0) {
1102 uint16_t dist_bits
= distance
- (1 << code_dist
);
1103 ok
= write_bits(&wc
, dist_bits
, code_dist
);
1105 return LZXPRESS_ERROR
;
1110 * There are some intricacies around flushing the bits and returning
1113 * If the returned length is not exactly right and there is another
1114 * block, that block will read its huffman table from the wrong place,
1115 * and have all the symbol codes out by a multiple of 4.
1118 if (wc
.bit_len
== 0) {
1121 ok
= write_bits(&wc
, 0, 16 - wc
.bit_len
);
1123 return LZXPRESS_ERROR
;
1125 for (i
= 0; i
< 2; i
++) {
1127 * Flush out the bits with zeroes. It doesn't matter if we do
1128 * a round too many, as we have buffer space, and have already
1129 * determined the returned length (end).
1131 ok
= write_bits(&wc
, 0, 16);
1133 return LZXPRESS_ERROR
;
1140 static ssize_t
lzx_huffman_compress_block(struct lzxhuff_compressor_context
*cmp_ctx
,
1141 struct lzxhuff_compressor_mem
*cmp_mem
,
1144 ssize_t intermediate_size
;
1145 uint16_t *hash_table
= NULL
;
1146 uint16_t *back_window_hash_table
= NULL
;
1147 ssize_t bytes_written
;
1149 if (cmp_ctx
->available_size
- cmp_ctx
->output_pos
< 260) {
1150 /* huffman block + 4 bytes */
1151 return LZXPRESS_ERROR
;
1155 * For LZ77 compression, we keep a hash table for the previous block,
1156 * via alternation after the first block.
1158 * LZ77 writes into the intermediate buffer in the cmp_mem context.
1160 if (block_no
== 0) {
1161 hash_table
= cmp_mem
->hash_table1
;
1162 back_window_hash_table
= NULL
;
1163 } else if (block_no
& 1) {
1164 hash_table
= cmp_mem
->hash_table2
;
1165 back_window_hash_table
= cmp_mem
->hash_table1
;
1167 hash_table
= cmp_mem
->hash_table1
;
1168 back_window_hash_table
= cmp_mem
->hash_table2
;
1171 intermediate_size
= lz77_encode_block(cmp_ctx
,
1174 back_window_hash_table
);
1176 if (intermediate_size
< 0) {
1177 return intermediate_size
;
1181 * Write the 256 byte Huffman table, based on the counts gained in
1184 bytes_written
= write_huffman_table(
1185 cmp_mem
->symbol_values
,
1186 cmp_ctx
->output
+ cmp_ctx
->output_pos
,
1187 cmp_ctx
->available_size
- cmp_ctx
->output_pos
);
1189 if (bytes_written
!= 256) {
1190 return LZXPRESS_ERROR
;
1192 cmp_ctx
->output_pos
+= 256;
1195 * Write the compressed bytes using the LZ77 matches and Huffman codes
1196 * worked out in the previous steps.
1198 bytes_written
= write_compressed_bytes(
1199 cmp_mem
->symbol_values
,
1200 cmp_mem
->intermediate
,
1202 cmp_ctx
->output
+ cmp_ctx
->output_pos
,
1203 cmp_ctx
->available_size
- cmp_ctx
->output_pos
);
1205 if (bytes_written
< 0) {
1206 return bytes_written
;
1209 cmp_ctx
->output_pos
+= bytes_written
;
1210 return bytes_written
;
1215 * lzxpress_huffman_compress_talloc()
1217 * This is the convenience function that allocates the compressor context and
1218 * output memory for you. The return value is the number of bytes written to
1219 * the location indicated by the output pointer.
1221 * The maximum input_size is effectively around 227MB due to the need to guess
1222 * an upper bound on the output size that hits an internal limitation in
1225 * @param mem_ctx TALLOC_CTX parent for the compressed buffer.
1226 * @param input_bytes memory to be compressed.
1227 * @param input_size length of the input buffer.
1228 * @param output destination pointer for the compressed data.
1230 * @return the number of bytes written or -1 on error.
1233 ssize_t
lzxpress_huffman_compress_talloc(TALLOC_CTX
*mem_ctx
,
1234 const uint8_t *input_bytes
,
1238 struct lzxhuff_compressor_mem
*cmp
= NULL
;
1240 * In the worst case, the output size should be about the same as the
1241 * input size, plus the 256 byte header per 64k block. We aim for
1242 * ample, but within the order of magnitude.
1244 size_t alloc_size
= input_size
+ (input_size
/ 8) + 270;
1245 ssize_t output_size
;
1247 *output
= talloc_array(mem_ctx
, uint8_t, alloc_size
);
1248 if (*output
== NULL
) {
1249 return LZXPRESS_ERROR
;
1252 cmp
= talloc(mem_ctx
, struct lzxhuff_compressor_mem
);
1254 TALLOC_FREE(*output
);
1255 return LZXPRESS_ERROR
;
1258 output_size
= lzxpress_huffman_compress(cmp
,
1266 if (output_size
< 0) {
1267 TALLOC_FREE(*output
);
1268 return LZXPRESS_ERROR
;
1271 *output
= talloc_realloc(mem_ctx
, *output
, uint8_t, output_size
);
1272 if (*output
== NULL
) {
1273 return LZXPRESS_ERROR
;
1280 * lzxpress_huffman_compress()
1282 * This is the inconvenience function, slightly faster and fiddlier than
1283 * lzxpress_huffman_compress_talloc().
1285 * To use this, you need to have allocated (but not initialised) a `struct
1286 * lzxhuff_compressor_context`, and an output buffer. If the buffer is not big
1287 * enough (per `output_size`), you'll get a negative return value, otherwise
1288 * the number of bytes actually consumed, which will always be at least 260.
1290 * The `struct lzxhuff_compressor_context` is reusable -- it is basically a
1291 * collection of uninitialised memory buffers. The total size is less than
1292 * 150k, so stack allocation is plausible.
1294 * input_size and available_size are limited to the minimum of UINT32_MAX and
1295 * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB.
1297 * @param cmp_mem a struct lzxhuff_compressor_mem.
1298 * @param input_bytes memory to be compressed.
1299 * @param input_size length of the input buffer.
1300 * @param output destination for the compressed data.
1301 * @param available_size allocated output bytes.
1303 * @return the number of bytes written or -1 on error.
1305 ssize_t
lzxpress_huffman_compress(struct lzxhuff_compressor_mem
*cmp_mem
,
1306 const uint8_t *input_bytes
,
1309 size_t available_size
)
1312 struct lzxhuff_compressor_context cmp_ctx
= {
1313 .input_bytes
= input_bytes
,
1314 .input_size
= input_size
,
1316 .prev_block_pos
= 0,
1318 .available_size
= available_size
,
1322 if (input_size
== 0) {
1324 * We can't deal with this for a number of reasons (e.g. it
1325 * breaks the Huffman tree), and the output will be infinitely
1326 * bigger than the input. The caller needs to go and think
1327 * about what they're trying to do here.
1329 return LZXPRESS_ERROR
;
1332 if (input_size
> SSIZE_MAX
||
1333 input_size
> UINT32_MAX
||
1334 available_size
> SSIZE_MAX
||
1335 available_size
> UINT32_MAX
||
1336 available_size
== 0) {
1338 * We use negative ssize_t to return errors, which is limiting
1339 * on 32 bit machines; otherwise we adhere to Microsoft's 4GB
1342 * lzxpress_huffman_compress_talloc() will not get this far,
1343 * having already have failed on talloc's 256 MB limit.
1345 return LZXPRESS_ERROR
;
1348 if (cmp_mem
== NULL
||
1350 input_bytes
== NULL
) {
1351 return LZXPRESS_ERROR
;
1354 while (cmp_ctx
.input_pos
< cmp_ctx
.input_size
) {
1356 ret
= lzx_huffman_compress_block(&cmp_ctx
,
1365 return cmp_ctx
.output_pos
;
1368 static void debug_tree_codes(struct bitstream
*input
)
1374 size_t ffff_count
= 0;
1379 struct q queue
[65536];
1381 uint16_t *t
= input
->table
;
1382 queue
[0].tree_code
= 1;
1383 queue
[0].code_code
= 2;
1384 queue
[1].tree_code
= 2;
1385 queue
[1].code_code
= 3;
1386 while (head
< tail
) {
1387 struct q q
= queue
[head
];
1388 uint16_t x
= t
[q
.tree_code
];
1391 uint16_t j
= q
.code_code
;
1392 size_t offset
= bitlen_nonzero_16(j
) - 1;
1393 if (unlikely(j
== 0)) {
1394 DBG("BROKEN code is 0!\n");
1398 for (k
= 0; k
<= offset
; k
++) {
1399 bool b
= (j
>> (offset
- k
)) & 1;
1400 bits
[k
] = b
? '1' : '0';
1403 DBG("%03x %s\n", x
& 511, bits
);
1408 queue
[tail
].tree_code
= q
.tree_code
* 2 + 1;
1409 queue
[tail
].code_code
= q
.code_code
* 2;
1411 queue
[tail
].tree_code
= q
.tree_code
* 2 + 1 + 1;
1412 queue
[tail
].code_code
= q
.code_code
* 2 + 1;
1416 DBG("0xffff count: %zu\n", ffff_count
);
1420 * Determines the sort order of one prefix_code_symbol relative to another
1422 static int compare_uint16(const uint16_t *a
, const uint16_t *b
)
1434 static bool fill_decomp_table(struct bitstream
*input
)
1437 * There are 512 symbols, each encoded in 4 bits, which indicates
1438 * their depth in the Huffman tree. The even numbers get the lower
1439 * nibble of each byte, so that the byte hex values look backwards
1440 * (i.e. 0xab encodes b then a). These are allocated Huffman codes in
1441 * order of appearance, per depth.
1443 * For example, if the first two bytes were:
1447 * the first four codes have the lengths 3, 2, 3, 5.
1448 * Let's call them A, B, C, D.
1450 * Suppose there is no other codeword with length 1 (which is
1451 * necessarily true in this example) or 2, but there might be others
1452 * of length 3 or 4. Then we can say this about the codes:
1469 * B has the shortest code, so takes the leftmost branch, 00. That
1470 * ends the branch -- nothing else can start with 00. There are no
1471 * more 2s, so we look at the 3s, starting as far left as possible. So
1472 * A takes 010 and C takes 011. That means everything else has to
1473 * start with 1xx. We don't know how many codewords of length 3 or 4
1474 * there are; if there are none, D would end up with 10000, the
1475 * leftmost available code of length 5. If the compressor is any good,
1476 * there should be no unused leaf nodes left dangling at the end.
1478 * (this is "Canonical Huffman Coding").
1481 * But what symbols do these codes actually stand for?
1482 * --------------------------------------------------
1484 * Good question. The first 256 codes stand for the corresponding
1485 * literal bytes. The codes from 256 to 511 stand for LZ77 matches,
1486 * which have a distance and a length, encoded in a strange way that
1487 * isn't entirely the purview of this function.
1489 * What does the value 0 mean?
1490 * ---------------------------
1492 * The code does not occur. For example, if the next byte in the
1493 * example above was 0x07, that would give the byte 0x04 a 7-long
1494 * code, and no code to the 0x05 byte, which means we there is no way
1495 * we going to see a 5 in the decoded stream.
1497 * Isn't LZ77 + Huffman what zip/gzip/zlib do?
1498 * -------------------------------------------
1500 * Yes, DEFLATE is LZ77 + Huffman, but the details are quite different.
1502 uint16_t symbols
[512];
1503 uint16_t sort_mem
[512];
1504 size_t i
, n_symbols
;
1506 uint16_t len
, prev_len
;
1507 const uint8_t *table_bytes
= input
->bytes
+ input
->byte_pos
;
1509 if (input
->byte_pos
+ 260 > input
->byte_size
) {
1514 for (i
= 0; i
< 256; i
++) {
1515 uint16_t even
= table_bytes
[i
] & 15;
1516 uint16_t odd
= table_bytes
[i
] >> 4;
1518 symbols
[n_symbols
] = (even
<< 9) + i
* 2;
1522 symbols
[n_symbols
] = (odd
<< 9) + i
* 2 + 1;
1526 input
->byte_pos
+= 256;
1527 if (n_symbols
== 0) {
1531 stable_sort(symbols
, sort_mem
, n_symbols
, sizeof(uint16_t),
1532 (samba_compare_fn_t
)compare_uint16
);
1535 * we're using an implicit binary tree, as you'd see in a heap.
1539 * table[3] = '00' <-- '00' and '01' are children of '0'
1540 * table[4] = '01' <-- '0' is [0], children are [0 * 2 + {1,2}]
1549 * table[1 << n - 1] = '0' * n
1550 * table[1 << n - 1 + x] = n-bit wide x (left padded with '0')
1551 * table[1 << n - 2] = '1' * (n - 1)
1553 * table[i]->left = table[i*2 + 1]
1554 * table[i]->right = table[i*2 + 2]
1555 * table[0xffff] = unused (16 '0's, max len is 15)
1557 * therefore e.g. table[70] = table[64 - 1 + 7]
1558 * = table[1 << 6 - 1 + 7]
1559 * = '000111' (binary 7, widened to 6 bits)
1561 * and if '000111' is a code,
1562 * '00011', '0001', '000', '00', '0' are unavailable prefixes.
1563 * 34 16 7 3 1 are their indices
1564 * and (i - 1) >> 1 is the path back from 70 through these.
1568 * 1 start with i = 0
1569 * 2 extract a symbol bit (i = (i << 1) + bit + 1)
1570 * 3 is table[i] == 0xffff?
1572 * 4 table[i] & 511 is the symbol, stop
1574 * and the construction (here) is sort of the reverse.
1576 * Most of this table is free space that can never be reached, and
1577 * most of the activity is at the beginning (since all codes start
1578 * there, and by design the shortest codes are the most common).
1580 for (i
= 0; i
< 32; i
++) {
1581 /* prefill the table head */
1582 input
->table
[i
] = 0xffff;
1586 for (i
= 0; i
< n_symbols
; i
++) {
1587 uint16_t s
= symbols
[i
];
1589 len
= (s
>> 9) & 15;
1592 while (len
!= prev_len
) {
1598 if (code
>= 65535) {
1601 input
->table
[code
] = s
;
1602 for(prefix
= (code
- 1) >> 1;
1604 prefix
= (prefix
- 1) >> 1) {
1605 input
->table
[prefix
] = 0xffff;
1608 if (CHECK_DEBUGLVL(10)) {
1609 debug_tree_codes(input
);
1613 * check that the last code encodes 11111..., with right number of
1614 * ones, pointing to the right symbol -- otherwise we have a dangling
1615 * uninitialised symbol.
1617 if (code
!= (1 << (len
+ 1)) - 2) {
1624 #define CHECK_READ_32(dest) \
1626 if (input->byte_pos + 4 > input->byte_size) { \
1627 return LZXPRESS_ERROR; \
1629 dest = PULL_LE_U32(input->bytes, input->byte_pos); \
1630 input->byte_pos += 4; \
1633 #define CHECK_READ_16(dest) \
1635 if (input->byte_pos + 2 > input->byte_size) { \
1636 return LZXPRESS_ERROR; \
1638 dest = PULL_LE_U16(input->bytes, input->byte_pos); \
1639 input->byte_pos += 2; \
1642 #define CHECK_READ_8(dest) \
1644 if (input->byte_pos >= input->byte_size) { \
1645 return LZXPRESS_ERROR; \
1647 dest = PULL_LE_U8(input->bytes, input->byte_pos); \
1648 input->byte_pos++; \
1652 static inline ssize_t
pull_bits(struct bitstream
*input
)
1654 if (input
->byte_pos
+ 1 < input
->byte_size
) {
1657 input
->remaining_bits
+= 16;
1660 } else if (input
->byte_pos
< input
->byte_size
) {
1663 input
->remaining_bits
+= 8;
1667 return LZXPRESS_ERROR
;
1674 * Decompress a block. The actual decompressed size is returned (or -1 on
1675 * error). The putative block length is 64k (or shorter, if the message ends
1676 * first), but a match can run over the end, extending the block. That's why
1677 * we need the overall output size as well as the block size. A match encoded
1678 * in this block can point back to previous blocks, but not before the
1679 * beginning of the message, so we also need the previously decoded size.
1681 * The compressed block will have 256 bytes for the Huffman table, and at
1682 * least 4 bytes of (possibly padded) encoded values.
1684 static ssize_t
lzx_huffman_decompress_block(struct bitstream
*input
,
1688 size_t previous_size
)
1690 size_t output_pos
= 0;
1693 uint16_t distance_bits_wanted
= 0;
1694 size_t distance
= 0;
1698 bool seen_eof_marker
= false;
1700 ok
= fill_decomp_table(input
);
1702 return LZXPRESS_ERROR
;
1704 if (CHECK_DEBUGLVL(10) || DEBUG_HUFFMAN_TREE
) {
1705 debug_huffman_tree_from_table(input
->table
);
1708 * Always read 32 bits at the start, even if we don't need them.
1711 CHECK_READ_16(input
->bits
);
1712 input
->bits
|= tmp
<< 16;
1713 input
->remaining_bits
= 32;
1716 * This loop iterates over individual *bits*. These are read from
1717 * little-endian 16 bit words, most significant bit first.
1719 * At points in the bitstream, the following are possible:
1721 * # the source word is empty and needs to be refilled from the input
1723 * # an incomplete codeword is being extended.
1724 * # a codeword is resolved, either as a literal or a match.
1725 * # a literal is written.
1726 * # a match is collecting distance bits.
1727 * # the output stream is copied, as specified by a match.
1728 * # input bytes are read for match lengths.
1730 * Note that we *don't* specifically check for the EOF marker (symbol
1731 * 256) in this loop, because the a precondition for stopping for the
1732 * EOF marker is that the output buffer is full (otherwise, you
1733 * wouldn't know which 256 is EOF, rather than an actual symbol), and
1734 * we *always* want to stop when the buffer is full. So we work out if
1735 * there is an EOF in in another loop after we stop writing.
1739 while (output_pos
< block_size
) {
1741 if (input
->remaining_bits
== 16) {
1742 ssize_t ret
= pull_bits(input
);
1747 input
->remaining_bits
--;
1749 b
= (input
->bits
>> input
->remaining_bits
) & 1;
1751 /* not in a match; pulling a codeword */
1754 if (input
->table
[index
] == 0xffff) {
1755 /* incomplete codeword, the common case */
1758 /* found the symbol, reset the code string */
1759 symbol
= input
->table
[index
] & 511;
1762 /* a literal, the easy case */
1763 output
[output_pos
] = symbol
;
1768 /* the beginning of a match */
1769 distance_bits_wanted
= (symbol
>> 4) & 15;
1770 distance
= 1 << distance_bits_wanted
;
1771 length
= symbol
& 15;
1775 if (length
== 255 + 15) {
1777 * note, we discard (don't add) the
1780 CHECK_READ_16(length
);
1782 CHECK_READ_32(length
);
1788 /* we are pulling extra distance bits */
1789 distance_bits_wanted
--;
1790 distance
|= b
<< distance_bits_wanted
;
1793 if (distance_bits_wanted
== 0) {
1795 * We have a complete match, and it is time to do the
1796 * copy (byte by byte, because the ranges can overlap,
1797 * and we might need to copy bytes we just copied in).
1799 * It is possible that this match will extend beyond
1800 * the end of the expected block. That's fine, so long
1801 * as it doesn't extend past the total output size.
1804 size_t end
= output_pos
+ length
;
1805 uint8_t *here
= output
+ output_pos
;
1806 uint8_t *there
= here
- distance
;
1807 if (end
> output_size
||
1808 previous_size
+ output_pos
< distance
||
1809 unlikely(end
< output_pos
|| there
> here
)) {
1810 return LZXPRESS_ERROR
;
1812 for (i
= 0; i
< length
; i
++) {
1815 output_pos
+= length
;
1821 if (length
!= 0 || index
!= 0) {
1822 /* it seems like we've hit an early end, mid-code */
1823 return LZXPRESS_ERROR
;
1826 if (input
->byte_pos
+ 256 < input
->byte_size
) {
1828 * This block is over, but it clearly isn't the last block, so
1829 * we don't want to look for the EOF.
1834 * We won't write any more, but we try to read some more to make sure
1835 * we're finishing in a good place. That means we want to see a 256
1836 * symbol and then some number of zeroes, possibly zero, but as many
1839 * In this we are perhaps a bit stricter than Windows, which
1840 * apparently does not insist on the EOF marker, nor on a lack of
1845 if (input
->remaining_bits
== 16) {
1847 if (input
->byte_pos
== input
->byte_size
) {
1851 ret
= pull_bits(input
);
1856 input
->remaining_bits
--;
1857 b
= (input
->bits
>> input
->remaining_bits
) & 1;
1858 if (seen_eof_marker
) {
1860 * we have read an EOF symbols. Now we just want to
1864 return LZXPRESS_ERROR
;
1869 /* we're pulling in a symbol, which had better be 256 */
1872 if (input
->table
[index
] == 0xffff) {
1876 symbol
= input
->table
[index
] & 511;
1877 if (symbol
!= 256) {
1878 return LZXPRESS_ERROR
;
1880 seen_eof_marker
= true;
1884 if (! seen_eof_marker
) {
1885 return LZXPRESS_ERROR
;
1891 static ssize_t
lzxpress_huffman_decompress_internal(struct bitstream
*input
,
1895 size_t output_pos
= 0;
1897 if (input
->byte_size
< 260) {
1898 return LZXPRESS_ERROR
;
1901 while (input
->byte_pos
< input
->byte_size
) {
1902 ssize_t block_output_pos
;
1903 ssize_t block_output_size
;
1904 size_t remaining_output_size
= output_size
- output_pos
;
1906 block_output_size
= MIN(65536, remaining_output_size
);
1908 block_output_pos
= lzx_huffman_decompress_block(
1910 output
+ output_pos
,
1912 remaining_output_size
,
1915 if (block_output_pos
< block_output_size
) {
1916 return LZXPRESS_ERROR
;
1918 output_pos
+= block_output_pos
;
1919 if (output_pos
> output_size
) {
1920 /* not expecting to get here. */
1921 return LZXPRESS_ERROR
;
1925 if (input
->byte_pos
!= input
->byte_size
) {
1926 return LZXPRESS_ERROR
;
1934 * lzxpress_huffman_decompress()
1936 * output_size must be the expected length of the decompressed data.
1937 * input_size and output_size are limited to the minimum of UINT32_MAX and
1938 * SSIZE_MAX. On 64 bit machines that will be UINT32_MAX, or 4GB.
1940 * @param input_bytes memory to be decompressed.
1941 * @param input_size length of the compressed buffer.
1942 * @param output destination for the decompressed data.
1943 * @param output_size exact expected length of the decompressed data.
1945 * @return the number of bytes written or -1 on error.
1948 ssize_t
lzxpress_huffman_decompress(const uint8_t *input_bytes
,
1953 uint16_t table
[65536];
1954 struct bitstream input
= {
1955 .bytes
= input_bytes
,
1956 .byte_size
= input_size
,
1959 .remaining_bits
= 0,
1963 if (input_size
> SSIZE_MAX
||
1964 input_size
> UINT32_MAX
||
1965 output_size
> SSIZE_MAX
||
1966 output_size
> UINT32_MAX
||
1969 input_bytes
== NULL
||
1972 * We use negative ssize_t to return errors, which is limiting
1973 * on 32 bit machines, and the 4GB limit exists on Windows.
1975 return LZXPRESS_ERROR
;
1978 return lzxpress_huffman_decompress_internal(&input
,
1985 * lzxpress_huffman_decompress_talloc()
1987 * The caller must provide the exact size of the expected output.
1989 * The input_size is limited to the minimum of UINT32_MAX and SSIZE_MAX, but
1990 * output_size is limited to 256MB due to a limit in talloc. This effectively
1991 * limits input_size too, as non-crafted compressed data will not exceed the
1992 * decompressed size by very much.
1994 * @param mem_ctx TALLOC_CTX parent for the decompressed buffer.
1995 * @param input_bytes memory to be decompressed.
1996 * @param input_size length of the compressed buffer.
1997 * @param output_size expected decompressed size.
1999 * @return a talloc'ed buffer exactly output_size in length, or NULL.
2002 uint8_t *lzxpress_huffman_decompress_talloc(TALLOC_CTX
*mem_ctx
,
2003 const uint8_t *input_bytes
,
2008 uint8_t *output
= NULL
;
2009 struct bitstream input
= {
2010 .bytes
= input_bytes
,
2011 .byte_size
= input_size
2014 output
= talloc_array(mem_ctx
, uint8_t, output_size
);
2015 if (output
== NULL
) {
2019 input
.table
= talloc_array(mem_ctx
, uint16_t, 65536);
2020 if (input
.table
== NULL
) {
2021 talloc_free(output
);
2024 result
= lzxpress_huffman_decompress_internal(&input
,
2027 talloc_free(input
.table
);
2029 if (result
!= output_size
) {
2030 talloc_free(output
);