4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 /* liblzma is a complex interface, so abstract it here. */
45 #include <sys/types.h>
47 #include <nbdkit-filter.h>
55 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
56 #define XZ_HEADER_MAGIC_LEN 6
57 #define XZ_FOOTER_MAGIC "YZ"
58 #define XZ_FOOTER_MAGIC_LEN 2
64 uint64_t max_uncompressed_block_size
;
67 static bool check_header_magic (nbdkit_next
*next
);
68 static lzma_index
*parse_indexes (nbdkit_next
*next
, size_t *);
69 static int iter_indexes (lzma_index
*idx
, size_t *, uint64_t *);
72 xzfile_open (nbdkit_next
*next
)
77 xz
= malloc (sizeof *xz
);
79 nbdkit_error ("malloc: %m");
83 /* Check file magic. */
84 if (!check_header_magic (next
)) {
85 nbdkit_error ("xz: not an xz file");
89 /* Read and parse the indexes. */
90 xz
->idx
= parse_indexes (next
, &xz
->nr_streams
);
94 /* Iterate over indexes to find the number of and largest block. */
95 if (iter_indexes (xz
->idx
,
96 &xz
->nr_blocks
, &xz
->max_uncompressed_block_size
) == -1)
99 size
= lzma_index_uncompressed_size (xz
->idx
);
100 nbdkit_debug ("xz: size %" PRIu64
" bytes (%.1fM)",
101 size
, size
/ 1024.0 / 1024.0);
102 nbdkit_debug ("xz: %zu streams, %zu blocks", xz
->nr_streams
, xz
->nr_blocks
);
103 nbdkit_debug ("xz: maximum uncompressed block size %" PRIu64
" bytes (%.1fM)",
104 xz
->max_uncompressed_block_size
,
105 xz
->max_uncompressed_block_size
/ 1024.0 / 1024.0);
115 check_header_magic (nbdkit_next
*next
)
117 char buf
[XZ_HEADER_MAGIC_LEN
];
120 if (next
->get_size (next
) < XZ_HEADER_MAGIC_LEN
) {
121 nbdkit_error ("xz: file too short");
124 if (next
->pread (next
, buf
, XZ_HEADER_MAGIC_LEN
, 0, 0, &err
) == -1) {
125 nbdkit_error ("xz: could not read header magic: error %d", err
);
128 if (memcmp (buf
, XZ_HEADER_MAGIC
, XZ_HEADER_MAGIC_LEN
) != 0)
133 /* For explanation of this function, see src/xz/list.c:parse_indexes
137 parse_indexes (nbdkit_next
*next
,
141 int64_t size
, pos
, index_size
, offs
;
143 uint8_t footer
[LZMA_STREAM_HEADER_SIZE
];
144 uint8_t header
[LZMA_STREAM_HEADER_SIZE
];
145 lzma_stream_flags footer_flags
;
146 lzma_stream_flags header_flags
;
147 lzma_stream strm
= LZMA_STREAM_INIT
;
148 lzma_index
*combined_index
= NULL
;
149 lzma_index
*this_index
= NULL
;
150 lzma_vli stream_padding
= 0;
154 /* Check file size is a multiple of 4 bytes. */
155 pos
= size
= next
->get_size (next
);
157 nbdkit_error ("xz: get_size: %m");
160 if ((pos
& 3) != 0) {
161 nbdkit_error ("xz: not an xz file: size is not a multiple of 4 bytes");
165 /* Jump backwards through the file identifying each stream. */
167 nbdkit_debug ("looping through streams: pos = %" PRIi64
, pos
);
169 if (pos
< LZMA_STREAM_HEADER_SIZE
) {
170 nbdkit_error ("xz: corrupted file at %" PRIi64
, pos
);
174 if (next
->pread (next
, footer
, LZMA_STREAM_HEADER_SIZE
,
175 pos
- LZMA_STREAM_HEADER_SIZE
, 0, &err
) == -1) {
176 nbdkit_error ("xz: read stream footer: error %d", err
);
179 /* Skip stream padding. */
180 if (footer
[8] == 0 && footer
[9] == 0 &&
181 footer
[10] == 0 && footer
[11] == 0) {
187 pos
-= LZMA_STREAM_HEADER_SIZE
;
190 nbdkit_debug ("decode stream footer at pos = %" PRIi64
, pos
);
192 /* Does the stream footer look reasonable? */
193 r
= lzma_stream_footer_decode (&footer_flags
, footer
);
195 nbdkit_error ("xz: invalid stream footer (error %d)", r
);
198 nbdkit_debug ("backward_size = %" PRIu64
,
199 (uint64_t) footer_flags
.backward_size
);
200 index_size
= footer_flags
.backward_size
;
201 if (pos
< index_size
+ LZMA_STREAM_HEADER_SIZE
) {
202 nbdkit_error ("xz: invalid stream footer");
207 nbdkit_debug ("decode index at pos = %" PRIi64
, pos
);
209 /* Decode the index. */
210 r
= lzma_index_decoder (&strm
, &this_index
, UINT64_MAX
);
212 nbdkit_error ("xz: invalid stream index (error %d)", r
);
220 strm
.avail_in
= index_size
;
221 if (strm
.avail_in
> BUFSIZ
)
222 strm
.avail_in
= BUFSIZ
;
223 if (pos
+ strm
.avail_in
> size
)
224 strm
.avail_in
= size
- pos
;
226 if (next
->pread (next
, buf
, strm
.avail_in
, offs
, 0, &err
) == -1) {
227 nbdkit_error ("xz: read index: error %d", err
);
230 offs
+= strm
.avail_in
;
231 index_size
-= strm
.avail_in
;
234 r
= lzma_code (&strm
, LZMA_RUN
);
235 } while (r
== LZMA_OK
);
237 if (r
!= LZMA_STREAM_END
) {
238 nbdkit_error ("xz: could not parse index (error %d)", r
);
242 pos
-= lzma_index_total_size (this_index
) + LZMA_STREAM_HEADER_SIZE
;
244 nbdkit_debug ("decode stream header at pos = %" PRIi64
, pos
);
246 /* Read and decode the stream header. */
247 if (next
->pread (next
, header
, LZMA_STREAM_HEADER_SIZE
, pos
, 0,
249 nbdkit_error ("xz: read stream header: error %d", err
);
253 r
= lzma_stream_header_decode (&header_flags
, header
);
255 nbdkit_error ("xz: invalid stream header (error %d)", r
);
259 /* Header and footer of the stream should be equal. */
260 r
= lzma_stream_flags_compare (&header_flags
, &footer_flags
);
262 nbdkit_error ("xz: header and footer of stream are not equal (error %d)",
267 /* Store the decoded stream flags in this_index. */
268 r
= lzma_index_stream_flags (this_index
, &footer_flags
);
270 nbdkit_error ("xz: cannot read stream_flags from index (error %d)", r
);
274 /* Store the amount of stream padding so far. Needed to calculate
275 * compressed offsets correctly in multi-stream files.
277 r
= lzma_index_stream_padding (this_index
, stream_padding
);
279 nbdkit_error ("xz: cannot set stream_padding in index (error %d)", r
);
283 if (combined_index
!= NULL
) {
284 r
= lzma_index_cat (this_index
, combined_index
, NULL
);
286 nbdkit_error ("xz: cannot combine indexes");
291 combined_index
= this_index
;
297 return combined_index
;
301 lzma_index_end (this_index
, NULL
);
302 lzma_index_end (combined_index
, NULL
);
306 /* Iterate over the indexes to find the number of blocks and
310 iter_indexes (lzma_index
*idx
,
311 size_t *nr_blocks
, uint64_t *max_uncompressed_block_size
)
313 lzma_index_iter iter
;
316 *max_uncompressed_block_size
= 0;
318 lzma_index_iter_init (&iter
, idx
);
319 while (!lzma_index_iter_next (&iter
, LZMA_INDEX_ITER_NONEMPTY_BLOCK
)) {
320 if (iter
.block
.uncompressed_size
> *max_uncompressed_block_size
)
321 *max_uncompressed_block_size
= iter
.block
.uncompressed_size
;
329 xzfile_close (xzfile
*xz
)
332 lzma_index_end (xz
->idx
, NULL
);
338 xzfile_max_uncompressed_block_size (xzfile
*xz
)
340 return xz
->max_uncompressed_block_size
;
344 xzfile_get_size (xzfile
*xz
)
346 return lzma_index_uncompressed_size (xz
->idx
);
350 xzfile_read_block (xzfile
*xz
,
352 uint32_t flags
, int *err
,
354 uint64_t *start_rtn
, uint64_t *size_rtn
)
357 lzma_index_iter iter
;
358 uint8_t header
[LZMA_BLOCK_HEADER_SIZE_MAX
];
360 lzma_filter filters
[LZMA_FILTERS_MAX
+ 1];
362 lzma_stream strm
= LZMA_STREAM_INIT
;
363 const size_t bufsize
= 1024 * 1024;
364 CLEANUP_FREE
unsigned char *buf
= NULL
;
368 /* Read the total size of the underlying disk, so we don't
371 size
= next
->get_size (next
);
373 nbdkit_error ("xz: get_size: %m");
377 /* Locate the block containing the uncompressed offset. */
378 lzma_index_iter_init (&iter
, xz
->idx
);
379 if (lzma_index_iter_locate (&iter
, offset
)) {
380 nbdkit_error ("cannot find offset %" PRIu64
" in the xz file", offset
);
384 *start_rtn
= iter
.block
.uncompressed_file_offset
;
385 *size_rtn
= iter
.block
.uncompressed_size
;
387 nbdkit_debug ("seek: block number %d at file offset %" PRIu64
,
388 (int) iter
.block
.number_in_file
,
389 (uint64_t) iter
.block
.compressed_file_offset
);
391 /* Read the block header. Start by reading a single byte which
392 * tell us how big the block header is.
394 offs
= iter
.block
.compressed_file_offset
;
395 if (next
->pread (next
, header
, 1, offs
, 0, err
) == -1) {
396 nbdkit_error ("xz: read: could not read block header byte: error %d", *err
);
401 if (header
[0] == '\0') {
402 nbdkit_error ("xz: read: unexpected invalid block in file, header[0] = 0");
407 block
.check
= iter
.stream
.flags
->check
;
408 block
.filters
= filters
;
409 block
.header_size
= lzma_block_header_size_decode (header
[0]);
411 /* Now read and decode the block header. */
412 if (next
->pread (next
, &header
[1], block
.header_size
-1, offs
,
414 nbdkit_error ("xz: read: could not read block of compressed data: "
418 offs
+= block
.header_size
- 1;
420 r
= lzma_block_header_decode (&block
, NULL
, header
);
422 nbdkit_error ("invalid block header (error %d)", r
);
426 /* What this actually does is it checks that the block header
429 r
= lzma_block_compressed_size (&block
, iter
.block
.unpadded_size
);
431 nbdkit_error ("cannot calculate compressed size (error %d)", r
);
435 /* Read the block data. */
436 r
= lzma_block_decoder (&strm
, &block
);
438 nbdkit_error ("invalid block (error %d)", r
);
442 data
= malloc (*size_rtn
);
444 nbdkit_error ("malloc (%" PRIu64
" bytes): %m\n"
445 "NOTE: If this error occurs, you need to recompress your "
446 "xz files with a smaller block size. "
447 "Use: 'xz --block-size=16777216 ...'.",
452 buf
= malloc (bufsize
);
454 nbdkit_error ("malloc: %m");
460 strm
.next_out
= (uint8_t *) data
;
461 strm
.avail_out
= block
.uncompressed_size
;
463 if (strm
.avail_in
== 0) {
464 strm
.avail_in
= bufsize
;
465 if (offs
+ strm
.avail_in
> size
)
466 strm
.avail_in
= size
- offs
;
467 if (strm
.avail_in
> 0) {
469 if (next
->pread (next
, buf
, strm
.avail_in
, offs
, 0, err
) == -1) {
470 nbdkit_error ("xz: read: error %d", *err
);
473 offs
+= strm
.avail_in
;
477 r
= lzma_code (&strm
, LZMA_RUN
);
478 } while (r
== LZMA_OK
);
480 if (r
!= LZMA_OK
&& r
!= LZMA_STREAM_END
) {
481 nbdkit_error ("could not parse block data (error %d)", r
);
487 for (i
= 0; filters
[i
].id
!= LZMA_VLI_UNKNOWN
; ++i
)
488 free (filters
[i
].options
);
495 for (i
= 0; filters
[i
].id
!= LZMA_VLI_UNKNOWN
; ++i
)
496 free (filters
[i
].options
);