1 ///////////////////////////////////////////////////////////////////////////////
3 /// \file lzip_decoder.c
4 /// \brief Decodes .lz (lzip) files
6 // Author: Michał Górny
9 // This file has been put into the public domain.
10 // You can do whatever you want with this file.
12 ///////////////////////////////////////////////////////////////////////////////
14 #include "lzip_decoder.h"
15 #include "lzma_decoder.h"
19 // .lz format version 0 lacks the 64-bit Member size field in the footer.
20 #define LZIP_V0_FOOTER_SIZE 12
21 #define LZIP_V1_FOOTER_SIZE 20
22 #define LZIP_FOOTER_SIZE_MAX LZIP_V1_FOOTER_SIZE
24 // lc/lp/pb are hardcoded in the .lz format.
40 /// .lz member format version
43 /// CRC32 of the uncompressed data in the .lz member
46 /// Uncompressed size of the .lz member
47 uint64_t uncompressed_size
;
49 /// Compressed size of the .lz member
52 /// Memory usage limit
55 /// Amount of memory actually needed
58 /// If true, LZMA_GET_CHECK is returned after decoding the header
59 /// fields. As all files use CRC32 this is redundant but it's
60 /// implemented anyway since the initialization functions supports
61 /// all other flags in addition to LZMA_TELL_ANY_CHECK.
64 /// If true, we won't calculate or verify the CRC32 of
65 /// the uncompressed data.
68 /// If true, we will decode concatenated .lz members and stop if
69 /// non-.lz data is seen after at least one member has been
70 /// successfully decoded.
73 /// When decoding concatenated .lz members, this is true as long as
74 /// we are decoding the first .lz member. This is needed to avoid
75 /// incorrect LZMA_FORMAT_ERROR in case there is non-.lz data at
76 /// the end of the file.
79 /// Reading position in the header and footer fields
82 /// Buffer to hold the .lz footer fields
83 uint8_t buffer
[LZIP_FOOTER_SIZE_MAX
];
85 /// Options decoded from the .lz header that needed to initialize
86 /// the LZMA1 decoder.
87 lzma_options_lzma options
;
90 lzma_next_coder lzma_decoder
;
96 lzip_decode(void *coder_ptr
, const lzma_allocator
*allocator
,
97 const uint8_t *restrict in
, size_t *restrict in_pos
,
98 size_t in_size
, uint8_t *restrict out
,
99 size_t *restrict out_pos
, size_t out_size
, lzma_action action
)
101 lzma_lzip_coder
*coder
= coder_ptr
;
104 switch (coder
->sequence
) {
105 case SEQ_ID_STRING
: {
106 // The "ID string" or magic bytes are "LZIP" in US-ASCII.
107 const uint8_t lzip_id_string
[4] = { 0x4C, 0x5A, 0x49, 0x50 };
109 while (coder
->pos
< sizeof(lzip_id_string
)) {
110 if (*in_pos
>= in_size
) {
111 // If we are on the 2nd+ concatenated member
112 // and the input ends before we can read
113 // the magic bytes, we discard the bytes that
114 // were already read (up to 3) and finish.
115 // See the reasoning below.
116 return !coder
->first_member
117 && action
== LZMA_FINISH
118 ? LZMA_STREAM_END
: LZMA_OK
;
121 if (in
[*in_pos
] != lzip_id_string
[coder
->pos
]) {
122 // The .lz format allows putting non-.lz data
123 // at the end of the file. If we have seen
124 // at least one valid .lz member already,
125 // then we won't consume the byte at *in_pos
126 // and will return LZMA_STREAM_END. This way
127 // apps can easily locate and read the non-.lz
128 // data after the .lz member(s).
130 // NOTE: If the first 1-3 bytes of the non-.lz
131 // data match the .lz ID string then the first
132 // 1-3 bytes of the junk will get ignored by
133 // us. If apps want to properly locate the
134 // trailing data they must ensure that the
135 // first byte of their custom data isn't the
136 // same as the first byte of .lz ID string.
137 // With the liblzma API we cannot rewind the
138 // input position across calls to lzma_code().
139 return !coder
->first_member
140 ? LZMA_STREAM_END
: LZMA_FORMAT_ERROR
;
150 coder
->uncompressed_size
= 0;
151 coder
->member_size
= sizeof(lzip_id_string
);
153 coder
->sequence
= SEQ_VERSION
;
159 if (*in_pos
>= in_size
)
162 coder
->version
= in
[(*in_pos
)++];
164 // We support version 0 and unextended version 1.
165 if (coder
->version
> 1)
166 return LZMA_OPTIONS_ERROR
;
168 ++coder
->member_size
;
169 coder
->sequence
= SEQ_DICT_SIZE
;
171 // .lz versions 0 and 1 use CRC32 as the integrity check
172 // so if the application wanted to know that
173 // (LZMA_TELL_ANY_CHECK) we can tell it now.
174 if (coder
->tell_any_check
)
175 return LZMA_GET_CHECK
;
179 case SEQ_DICT_SIZE
: {
180 if (*in_pos
>= in_size
)
183 const uint32_t ds
= in
[(*in_pos
)++];
184 ++coder
->member_size
;
186 // The five lowest bits are for the base-2 logarithm of
187 // the dictionary size and the highest three bits are
188 // the fractional part (0/16 to 7/16) that will be
189 // substracted to get the final value.
191 // For example, with 0xB5:
194 // dict_size = 2^21 - 2^21 * 5 / 16 = 1408 KiB
195 const uint32_t b2log
= ds
& 0x1F;
196 const uint32_t fracnum
= ds
>> 5;
198 // The format versions 0 and 1 allow dictionary size in the
199 // range [4 KiB, 512 MiB].
200 if (b2log
< 12 || b2log
> 29 || (b2log
== 12 && fracnum
> 0))
201 return LZMA_DATA_ERROR
;
203 // 2^[b2log] - 2^[b2log] * [fracnum] / 16
204 // = 2^[b2log] - [fracnum] * 2^([b2log] - 4)
205 coder
->options
.dict_size
= (UINT32_C(1) << b2log
)
206 - (fracnum
<< (b2log
- 4));
208 assert(coder
->options
.dict_size
>= 4096);
209 assert(coder
->options
.dict_size
<= (UINT32_C(512) << 20));
211 coder
->options
.preset_dict
= NULL
;
212 coder
->options
.lc
= LZIP_LC
;
213 coder
->options
.lp
= LZIP_LP
;
214 coder
->options
.pb
= LZIP_PB
;
216 // Calculate the memory usage.
217 coder
->memusage
= lzma_lzma_decoder_memusage(&coder
->options
)
218 + LZMA_MEMUSAGE_BASE
;
220 // Initialization is a separate step because if we return
221 // LZMA_MEMLIMIT_ERROR we need to be able to restart after
222 // the memlimit has been increased.
223 coder
->sequence
= SEQ_CODER_INIT
;
228 case SEQ_CODER_INIT
: {
229 if (coder
->memusage
> coder
->memlimit
)
230 return LZMA_MEMLIMIT_ERROR
;
232 const lzma_filter_info filters
[2] = {
234 .id
= LZMA_FILTER_LZMA1
,
235 .init
= &lzma_lzma_decoder_init
,
236 .options
= &coder
->options
,
242 return_if_error(lzma_next_filter_init(&coder
->lzma_decoder
,
243 allocator
, filters
));
246 coder
->sequence
= SEQ_LZMA_STREAM
;
251 case SEQ_LZMA_STREAM
: {
252 const size_t in_start
= *in_pos
;
253 const size_t out_start
= *out_pos
;
255 const lzma_ret ret
= coder
->lzma_decoder
.code(
256 coder
->lzma_decoder
.coder
, allocator
,
257 in
, in_pos
, in_size
, out
, out_pos
, out_size
,
260 const size_t out_used
= *out_pos
- out_start
;
262 coder
->member_size
+= *in_pos
- in_start
;
263 coder
->uncompressed_size
+= out_used
;
265 if (!coder
->ignore_check
)
266 coder
->crc32
= lzma_crc32(out
+ out_start
, out_used
,
269 if (ret
!= LZMA_STREAM_END
)
272 coder
->sequence
= SEQ_MEMBER_FOOTER
;
277 case SEQ_MEMBER_FOOTER
: {
278 // The footer of .lz version 0 lacks the Member size field.
279 // This is the only difference between version 0 and
280 // unextended version 1 formats.
281 const size_t footer_size
= coder
->version
== 0
282 ? LZIP_V0_FOOTER_SIZE
283 : LZIP_V1_FOOTER_SIZE
;
285 // Copy the CRC32, Data size, and Member size fields to
286 // the internal buffer.
287 lzma_bufcpy(in
, in_pos
, in_size
, coder
->buffer
, &coder
->pos
,
290 // Return if we didn't get the whole footer yet.
291 if (coder
->pos
< footer_size
)
295 coder
->member_size
+= footer_size
;
297 // Check that the footer fields match the observed data.
298 if (!coder
->ignore_check
299 && coder
->crc32
!= read32le(&coder
->buffer
[0]))
300 return LZMA_DATA_ERROR
;
302 if (coder
->uncompressed_size
!= read64le(&coder
->buffer
[4]))
303 return LZMA_DATA_ERROR
;
305 if (coder
->version
> 0) {
306 // .lz version 0 has no Member size field.
307 if (coder
->member_size
!= read64le(&coder
->buffer
[12]))
308 return LZMA_DATA_ERROR
;
311 // Decoding is finished if we weren't requested to decode
312 // more than one .lz member.
313 if (!coder
->concatenated
)
314 return LZMA_STREAM_END
;
316 coder
->first_member
= false;
317 coder
->sequence
= SEQ_ID_STRING
;
323 return LZMA_PROG_ERROR
;
331 lzip_decoder_end(void *coder_ptr
, const lzma_allocator
*allocator
)
333 lzma_lzip_coder
*coder
= coder_ptr
;
334 lzma_next_end(&coder
->lzma_decoder
, allocator
);
335 lzma_free(coder
, allocator
);
341 lzip_decoder_get_check(const void *coder_ptr
lzma_attribute((__unused__
)))
343 return LZMA_CHECK_CRC32
;
348 lzip_decoder_memconfig(void *coder_ptr
, uint64_t *memusage
,
349 uint64_t *old_memlimit
, uint64_t new_memlimit
)
351 lzma_lzip_coder
*coder
= coder_ptr
;
353 *memusage
= coder
->memusage
;
354 *old_memlimit
= coder
->memlimit
;
356 if (new_memlimit
!= 0) {
357 if (new_memlimit
< coder
->memusage
)
358 return LZMA_MEMLIMIT_ERROR
;
360 coder
->memlimit
= new_memlimit
;
368 lzma_lzip_decoder_init(
369 lzma_next_coder
*next
, const lzma_allocator
*allocator
,
370 uint64_t memlimit
, uint32_t flags
)
372 lzma_next_coder_init(&lzma_lzip_decoder_init
, next
, allocator
);
374 if (flags
& ~LZMA_SUPPORTED_FLAGS
)
375 return LZMA_OPTIONS_ERROR
;
377 lzma_lzip_coder
*coder
= next
->coder
;
379 coder
= lzma_alloc(sizeof(lzma_lzip_coder
), allocator
);
381 return LZMA_MEM_ERROR
;
384 next
->code
= &lzip_decode
;
385 next
->end
= &lzip_decoder_end
;
386 next
->get_check
= &lzip_decoder_get_check
;
387 next
->memconfig
= &lzip_decoder_memconfig
;
389 coder
->lzma_decoder
= LZMA_NEXT_CODER_INIT
;
392 coder
->sequence
= SEQ_ID_STRING
;
393 coder
->memlimit
= my_max(1, memlimit
);
394 coder
->memusage
= LZMA_MEMUSAGE_BASE
;
395 coder
->tell_any_check
= (flags
& LZMA_TELL_ANY_CHECK
) != 0;
396 coder
->ignore_check
= (flags
& LZMA_IGNORE_CHECK
) != 0;
397 coder
->concatenated
= (flags
& LZMA_CONCATENATED
) != 0;
398 coder
->first_member
= true;
405 extern LZMA_API(lzma_ret
)
406 lzma_lzip_decoder(lzma_stream
*strm
, uint64_t memlimit
, uint32_t flags
)
408 lzma_next_strm_init(lzma_lzip_decoder_init
, strm
, memlimit
, flags
);
410 strm
->internal
->supported_actions
[LZMA_RUN
] = true;
411 strm
->internal
->supported_actions
[LZMA_FINISH
] = true;