2 * Copyright (C) 2013 Red Hat Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 /* liblzma is a complex interface, so abstract it here. */
45 #include <sys/types.h>
47 #include <nbdkit-plugin.h>
53 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
54 #define XZ_HEADER_MAGIC_LEN 6
55 #define XZ_FOOTER_MAGIC "YZ"
56 #define XZ_FOOTER_MAGIC_LEN 2
63 uint64_t max_uncompressed_block_size
;
66 static int check_header_magic (int fd
);
67 static lzma_index
*parse_indexes (const char *filename
, int fd
, size_t *);
68 static int iter_indexes (lzma_index
*idx
, size_t *, uint64_t *);
71 xzfile_open (const char *filename
)
76 xz
= malloc (sizeof *xz
);
78 nbdkit_error ("malloc: %m");
83 xz
->fd
= open (filename
, O_RDONLY
|O_CLOEXEC
);
85 nbdkit_error ("%s: %m", filename
);
89 /* Check file magic. */
90 if (!check_header_magic (xz
->fd
)) {
91 nbdkit_error ("%s: not an xz file", filename
);
95 /* Read and parse the indexes. */
96 xz
->idx
= parse_indexes (filename
, xz
->fd
, &xz
->nr_streams
);
100 /* Iterate over indexes to find the number of and largest block. */
101 if (iter_indexes (xz
->idx
,
102 &xz
->nr_blocks
, &xz
->max_uncompressed_block_size
) == -1)
105 size
= lzma_index_uncompressed_size (xz
->idx
);
106 nbdkit_debug ("%s: size %" PRIu64
" bytes (%.1fM)",
107 filename
, size
, size
/ 1024.0 / 1024.0);
108 nbdkit_debug ("%s: %zu streams, %zu blocks", filename
,
109 xz
->nr_streams
, xz
->nr_blocks
);
110 nbdkit_debug ("%s: maximum uncompressed block size %" PRIu64
" bytes (%.1fM)",
112 xz
->max_uncompressed_block_size
,
113 xz
->max_uncompressed_block_size
/ 1024.0 / 1024.0);
125 check_header_magic (int fd
)
127 char buf
[XZ_HEADER_MAGIC_LEN
];
129 if (lseek (fd
, 0, SEEK_SET
) == -1)
131 if (read (fd
, buf
, XZ_HEADER_MAGIC_LEN
) != XZ_HEADER_MAGIC_LEN
)
133 if (memcmp (buf
, XZ_HEADER_MAGIC
, XZ_HEADER_MAGIC_LEN
) != 0)
138 /* For explanation of this function, see src/xz/list.c:parse_indexes
142 parse_indexes (const char *filename
, int fd
, size_t *nr_streams
)
145 off_t pos
, index_size
;
146 uint8_t footer
[LZMA_STREAM_HEADER_SIZE
];
147 uint8_t header
[LZMA_STREAM_HEADER_SIZE
];
148 lzma_stream_flags footer_flags
;
149 lzma_stream_flags header_flags
;
150 lzma_stream strm
= LZMA_STREAM_INIT
;
152 lzma_index
*combined_index
= NULL
;
153 lzma_index
*this_index
= NULL
;
154 lzma_vli stream_padding
= 0;
158 /* Check file size is a multiple of 4 bytes. */
159 pos
= lseek (fd
, 0, SEEK_END
);
160 if (pos
== (off_t
) -1) {
161 nbdkit_error ("%s: lseek: %m", filename
);
164 if ((pos
& 3) != 0) {
165 nbdkit_error ("%s: not an xz file: size is not a multiple of 4 bytes",
170 /* Jump backwards through the file identifying each stream. */
172 nbdkit_debug ("looping through streams: pos = %" PRIu64
, (uint64_t) pos
);
174 if (pos
< LZMA_STREAM_HEADER_SIZE
) {
175 nbdkit_error ("%s: corrupted file at %" PRIu64
, filename
, (uint64_t) pos
);
179 if (lseek (fd
, -LZMA_STREAM_HEADER_SIZE
, SEEK_CUR
) == -1) {
180 nbdkit_error ("%s: lseek: %m", filename
);
183 if (read (fd
, footer
, LZMA_STREAM_HEADER_SIZE
) != LZMA_STREAM_HEADER_SIZE
) {
184 nbdkit_error ("%s: read stream footer: %m", filename
);
187 /* Skip stream padding. */
188 if (footer
[8] == 0 && footer
[9] == 0 &&
189 footer
[10] == 0 && footer
[11] == 0) {
195 pos
-= LZMA_STREAM_HEADER_SIZE
;
198 nbdkit_debug ("decode stream footer at pos = %" PRIu64
, (uint64_t) pos
);
200 /* Does the stream footer look reasonable? */
201 r
= lzma_stream_footer_decode (&footer_flags
, footer
);
203 nbdkit_error ("%s: invalid stream footer (error %d)", filename
, r
);
206 nbdkit_debug ("backward_size = %" PRIu64
,
207 (uint64_t) footer_flags
.backward_size
);
208 index_size
= footer_flags
.backward_size
;
209 if (pos
< index_size
+ LZMA_STREAM_HEADER_SIZE
) {
210 nbdkit_error ("%s: invalid stream footer", filename
);
215 nbdkit_debug ("decode index at pos = %" PRIu64
, (uint64_t) pos
);
217 /* Seek backwards to the index of this stream. */
218 if (lseek (fd
, pos
, SEEK_SET
) == -1) {
219 nbdkit_error ("%s: lseek: %m", filename
);
223 /* Decode the index. */
224 r
= lzma_index_decoder (&strm
, &this_index
, UINT64_MAX
);
226 nbdkit_error ("%s: invalid stream index (error %d)", filename
, r
);
233 strm
.avail_in
= index_size
;
234 if (strm
.avail_in
> BUFSIZ
)
235 strm
.avail_in
= BUFSIZ
;
237 n
= read (fd
, &buf
, strm
.avail_in
);
239 nbdkit_error ("read: %m");
242 index_size
-= strm
.avail_in
;
245 r
= lzma_code (&strm
, LZMA_RUN
);
246 } while (r
== LZMA_OK
);
248 if (r
!= LZMA_STREAM_END
) {
249 nbdkit_error ("%s: could not parse index (error %d)",
254 pos
-= lzma_index_total_size (this_index
) + LZMA_STREAM_HEADER_SIZE
;
256 nbdkit_debug ("decode stream header at pos = %" PRIu64
, (uint64_t) pos
);
258 /* Read and decode the stream header. */
259 if (lseek (fd
, pos
, SEEK_SET
) == -1) {
260 nbdkit_error ("%s: lseek: %m", filename
);
263 if (read (fd
, header
, LZMA_STREAM_HEADER_SIZE
) != LZMA_STREAM_HEADER_SIZE
) {
264 nbdkit_error ("%s: read stream header: %m", filename
);
268 r
= lzma_stream_header_decode (&header_flags
, header
);
270 nbdkit_error ("%s: invalid stream header (error %d)", filename
, r
);
274 /* Header and footer of the stream should be equal. */
275 r
= lzma_stream_flags_compare (&header_flags
, &footer_flags
);
277 nbdkit_error ("%s: header and footer of stream are not equal (error %d)",
282 /* Store the decoded stream flags in this_index. */
283 r
= lzma_index_stream_flags (this_index
, &footer_flags
);
285 nbdkit_error ("%s: cannot read stream_flags from index (error %d)",
290 /* Store the amount of stream padding so far. Needed to calculate
291 * compressed offsets correctly in multi-stream files.
293 r
= lzma_index_stream_padding (this_index
, stream_padding
);
295 nbdkit_error ("%s: cannot set stream_padding in index (error %d)",
300 if (combined_index
!= NULL
) {
301 r
= lzma_index_cat (this_index
, combined_index
, NULL
);
303 nbdkit_error ("%s: cannot combine indexes", filename
);
308 combined_index
= this_index
;
314 return combined_index
;
318 lzma_index_end (this_index
, NULL
);
319 lzma_index_end (combined_index
, NULL
);
323 /* Iterate over the indexes to find the number of blocks and
327 iter_indexes (lzma_index
*idx
,
328 size_t *nr_blocks
, uint64_t *max_uncompressed_block_size
)
330 lzma_index_iter iter
;
333 *max_uncompressed_block_size
= 0;
335 lzma_index_iter_init (&iter
, idx
);
336 while (!lzma_index_iter_next (&iter
, LZMA_INDEX_ITER_NONEMPTY_BLOCK
)) {
337 if (iter
.block
.uncompressed_size
> *max_uncompressed_block_size
)
338 *max_uncompressed_block_size
= iter
.block
.uncompressed_size
;
346 xzfile_close (xzfile
*xz
)
348 lzma_index_end (xz
->idx
, NULL
);
354 xzfile_max_uncompressed_block_size (xzfile
*xz
)
356 return xz
->max_uncompressed_block_size
;
360 xzfile_get_size (xzfile
*xz
)
362 return lzma_index_uncompressed_size (xz
->idx
);
366 xzfile_read_block (xzfile
*xz
, uint64_t offset
,
367 uint64_t *start_rtn
, uint64_t *size_rtn
)
369 lzma_index_iter iter
;
370 uint8_t header
[LZMA_BLOCK_HEADER_SIZE_MAX
];
372 lzma_filter filters
[LZMA_FILTERS_MAX
+ 1];
374 lzma_stream strm
= LZMA_STREAM_INIT
;
379 /* Locate the block containing the uncompressed offset. */
380 lzma_index_iter_init (&iter
, xz
->idx
);
381 if (lzma_index_iter_locate (&iter
, offset
)) {
382 nbdkit_error ("cannot find offset %" PRIu64
" in the xz file", offset
);
386 *start_rtn
= iter
.block
.uncompressed_file_offset
;
387 *size_rtn
= iter
.block
.uncompressed_size
;
389 nbdkit_debug ("seek: block number %d at file offset %" PRIu64
,
390 (int) iter
.block
.number_in_file
,
391 (uint64_t) iter
.block
.compressed_file_offset
);
393 if (lseek (xz
->fd
, iter
.block
.compressed_file_offset
, SEEK_SET
) == -1) {
394 nbdkit_error ("lseek: %m");
398 /* Read the block header. Start by reading a single byte which
399 * tell us how big the block header is.
401 n
= read (xz
->fd
, header
, 1);
403 nbdkit_error ("read: unexpected end of file reading block header byte");
407 nbdkit_error ("read: %m");
411 if (header
[0] == '\0') {
412 nbdkit_error ("read: unexpected invalid block in file, header[0] = 0");
417 block
.check
= iter
.stream
.flags
->check
;
418 block
.filters
= filters
;
419 block
.header_size
= lzma_block_header_size_decode (header
[0]);
421 /* Now read and decode the block header. */
422 n
= read (xz
->fd
, &header
[1], block
.header_size
-1);
423 if (n
>= 0 && n
!= block
.header_size
-1) {
424 nbdkit_error ("read: unexpected end of file reading block header");
428 nbdkit_error ("read: %m");
432 r
= lzma_block_header_decode (&block
, NULL
, header
);
434 nbdkit_error ("invalid block header (error %d)", r
);
438 /* What this actually does is it checks that the block header
441 r
= lzma_block_compressed_size (&block
, iter
.block
.unpadded_size
);
443 nbdkit_error ("cannot calculate compressed size (error %d)", r
);
447 /* Read the block data. */
448 r
= lzma_block_decoder (&strm
, &block
);
450 nbdkit_error ("invalid block (error %d)", r
);
454 data
= malloc (*size_rtn
);
456 nbdkit_error ("malloc (%zu bytes): %m\n"
457 "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size. Use: 'xz --block-size=16777216 ...'.",
464 strm
.next_out
= (uint8_t *) data
;
465 strm
.avail_out
= block
.uncompressed_size
;
469 lzma_action action
= LZMA_RUN
;
471 if (strm
.avail_in
== 0) {
473 n
= read (xz
->fd
, buf
, sizeof buf
);
475 nbdkit_error ("read: %m");
480 action
= LZMA_FINISH
;
485 r
= lzma_code (&strm
, action
);
486 } while (r
== LZMA_OK
);
488 if (r
!= LZMA_OK
&& r
!= LZMA_STREAM_END
) {
489 nbdkit_error ("could not parse block data (error %d)", r
);
495 for (i
= 0; filters
[i
].id
!= LZMA_VLI_UNKNOWN
; ++i
)
496 free (filters
[i
].options
);
504 for (i
= 0; filters
[i
].id
!= LZMA_VLI_UNKNOWN
; ++i
)
505 free (filters
[i
].options
);