plugins/xz: Fix debug vs error mixup.
[nbdkit/ericb.git] / plugins / xz / xzfile.c
blobfcc2937eb1b90686df63bc7b0eab27c50f6182a3
1 /* nbdkit
2 * Copyright (C) 2013 Red Hat Inc.
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Red Hat nor the names of its contributors may be
17 * used to endorse or promote products derived from this software without
18 * specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
24 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
28 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
34 /* liblzma is a complex interface, so abstract it here. */
36 #include <config.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdint.h>
42 #include <inttypes.h>
43 #include <unistd.h>
44 #include <fcntl.h>
45 #include <sys/types.h>
47 #include <nbdkit-plugin.h>
49 #include <lzma.h>
51 #include "xzfile.h"
53 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
54 #define XZ_HEADER_MAGIC_LEN 6
55 #define XZ_FOOTER_MAGIC "YZ"
56 #define XZ_FOOTER_MAGIC_LEN 2
58 struct xzfile {
59 int fd;
60 lzma_index *idx;
61 size_t nr_streams;
62 size_t nr_blocks;
63 uint64_t max_uncompressed_block_size;
66 static int check_header_magic (int fd);
67 static lzma_index *parse_indexes (const char *filename, int fd, size_t *);
68 static int iter_indexes (lzma_index *idx, size_t *, uint64_t *);
70 xzfile *
71 xzfile_open (const char *filename)
73 xzfile *xz;
74 uint64_t size;
76 xz = malloc (sizeof *xz);
77 if (xz == NULL) {
78 nbdkit_error ("malloc: %m");
79 return NULL;
82 /* Open the file. */
83 xz->fd = open (filename, O_RDONLY|O_CLOEXEC);
84 if (xz->fd == -1) {
85 nbdkit_error ("%s: %m", filename);
86 goto err1;
89 /* Check file magic. */
90 if (!check_header_magic (xz->fd)) {
91 nbdkit_error ("%s: not an xz file", filename);
92 goto err2;
95 /* Read and parse the indexes. */
96 xz->idx = parse_indexes (filename, xz->fd, &xz->nr_streams);
97 if (xz->idx == NULL)
98 goto err2;
100 /* Iterate over indexes to find the number of and largest block. */
101 if (iter_indexes (xz->idx,
102 &xz->nr_blocks, &xz->max_uncompressed_block_size) == -1)
103 goto err2;
105 size = lzma_index_uncompressed_size (xz->idx);
106 nbdkit_debug ("%s: size %" PRIu64 " bytes (%.1fM)",
107 filename, size, size / 1024.0 / 1024.0);
108 nbdkit_debug ("%s: %zu streams, %zu blocks", filename,
109 xz->nr_streams, xz->nr_blocks);
110 nbdkit_debug ("%s: maximum uncompressed block size %" PRIu64 " bytes (%.1fM)",
111 filename,
112 xz->max_uncompressed_block_size,
113 xz->max_uncompressed_block_size / 1024.0 / 1024.0);
115 return xz;
117 err2:
118 close (xz->fd);
119 err1:
120 free (xz);
121 return NULL;
124 static int
125 check_header_magic (int fd)
127 char buf[XZ_HEADER_MAGIC_LEN];
129 if (lseek (fd, 0, SEEK_SET) == -1)
130 return 0;
131 if (read (fd, buf, XZ_HEADER_MAGIC_LEN) != XZ_HEADER_MAGIC_LEN)
132 return 0;
133 if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
134 return 0;
135 return 1;
138 /* For explanation of this function, see src/xz/list.c:parse_indexes
139 * in the xz sources.
141 static lzma_index *
142 parse_indexes (const char *filename, int fd, size_t *nr_streams)
144 lzma_ret r;
145 off_t pos, index_size;
146 uint8_t footer[LZMA_STREAM_HEADER_SIZE];
147 uint8_t header[LZMA_STREAM_HEADER_SIZE];
148 lzma_stream_flags footer_flags;
149 lzma_stream_flags header_flags;
150 lzma_stream strm = LZMA_STREAM_INIT;
151 ssize_t n;
152 lzma_index *combined_index = NULL;
153 lzma_index *this_index = NULL;
154 lzma_vli stream_padding = 0;
156 *nr_streams = 0;
158 /* Check file size is a multiple of 4 bytes. */
159 pos = lseek (fd, 0, SEEK_END);
160 if (pos == (off_t) -1) {
161 nbdkit_error ("%s: lseek: %m", filename);
162 goto err;
164 if ((pos & 3) != 0) {
165 nbdkit_error ("%s: not an xz file: size is not a multiple of 4 bytes",
166 filename);
167 goto err;
170 /* Jump backwards through the file identifying each stream. */
171 while (pos > 0) {
172 nbdkit_debug ("looping through streams: pos = %" PRIu64, (uint64_t) pos);
174 if (pos < LZMA_STREAM_HEADER_SIZE) {
175 nbdkit_error ("%s: corrupted file at %" PRIu64, filename, (uint64_t) pos);
176 goto err;
179 if (lseek (fd, -LZMA_STREAM_HEADER_SIZE, SEEK_CUR) == -1) {
180 nbdkit_error ("%s: lseek: %m", filename);
181 goto err;
183 if (read (fd, footer, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE) {
184 nbdkit_error ("%s: read stream footer: %m", filename);
185 goto err;
187 /* Skip stream padding. */
188 if (footer[8] == 0 && footer[9] == 0 &&
189 footer[10] == 0 && footer[11] == 0) {
190 stream_padding += 4;
191 pos -= 4;
192 continue;
195 pos -= LZMA_STREAM_HEADER_SIZE;
196 (*nr_streams)++;
198 nbdkit_debug ("decode stream footer at pos = %" PRIu64, (uint64_t) pos);
200 /* Does the stream footer look reasonable? */
201 r = lzma_stream_footer_decode (&footer_flags, footer);
202 if (r != LZMA_OK) {
203 nbdkit_error ("%s: invalid stream footer (error %d)", filename, r);
204 goto err;
206 nbdkit_debug ("backward_size = %" PRIu64,
207 (uint64_t) footer_flags.backward_size);
208 index_size = footer_flags.backward_size;
209 if (pos < index_size + LZMA_STREAM_HEADER_SIZE) {
210 nbdkit_error ("%s: invalid stream footer", filename);
211 goto err;
214 pos -= index_size;
215 nbdkit_debug ("decode index at pos = %" PRIu64, (uint64_t) pos);
217 /* Seek backwards to the index of this stream. */
218 if (lseek (fd, pos, SEEK_SET) == -1) {
219 nbdkit_error ("%s: lseek: %m", filename);
220 goto err;
223 /* Decode the index. */
224 r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
225 if (r != LZMA_OK) {
226 nbdkit_error ("%s: invalid stream index (error %d)", filename, r);
227 goto err;
230 do {
231 uint8_t buf[BUFSIZ];
233 strm.avail_in = index_size;
234 if (strm.avail_in > BUFSIZ)
235 strm.avail_in = BUFSIZ;
237 n = read (fd, &buf, strm.avail_in);
238 if (n == -1) {
239 nbdkit_error ("read: %m");
240 goto err;
242 index_size -= strm.avail_in;
244 strm.next_in = buf;
245 r = lzma_code (&strm, LZMA_RUN);
246 } while (r == LZMA_OK);
248 if (r != LZMA_STREAM_END) {
249 nbdkit_error ("%s: could not parse index (error %d)",
250 filename, r);
251 goto err;
254 pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
256 nbdkit_debug ("decode stream header at pos = %" PRIu64, (uint64_t) pos);
258 /* Read and decode the stream header. */
259 if (lseek (fd, pos, SEEK_SET) == -1) {
260 nbdkit_error ("%s: lseek: %m", filename);
261 goto err;
263 if (read (fd, header, LZMA_STREAM_HEADER_SIZE) != LZMA_STREAM_HEADER_SIZE) {
264 nbdkit_error ("%s: read stream header: %m", filename);
265 goto err;
268 r = lzma_stream_header_decode (&header_flags, header);
269 if (r != LZMA_OK) {
270 nbdkit_error ("%s: invalid stream header (error %d)", filename, r);
271 goto err;
274 /* Header and footer of the stream should be equal. */
275 r = lzma_stream_flags_compare (&header_flags, &footer_flags);
276 if (r != LZMA_OK) {
277 nbdkit_error ("%s: header and footer of stream are not equal (error %d)",
278 filename, r);
279 goto err;
282 /* Store the decoded stream flags in this_index. */
283 r = lzma_index_stream_flags (this_index, &footer_flags);
284 if (r != LZMA_OK) {
285 nbdkit_error ("%s: cannot read stream_flags from index (error %d)",
286 filename, r);
287 goto err;
290 /* Store the amount of stream padding so far. Needed to calculate
291 * compressed offsets correctly in multi-stream files.
293 r = lzma_index_stream_padding (this_index, stream_padding);
294 if (r != LZMA_OK) {
295 nbdkit_error ("%s: cannot set stream_padding in index (error %d)",
296 filename, r);
297 goto err;
300 if (combined_index != NULL) {
301 r = lzma_index_cat (this_index, combined_index, NULL);
302 if (r != LZMA_OK) {
303 nbdkit_error ("%s: cannot combine indexes", filename);
304 goto err;
308 combined_index = this_index;
309 this_index = NULL;
312 lzma_end (&strm);
314 return combined_index;
316 err:
317 lzma_end (&strm);
318 lzma_index_end (this_index, NULL);
319 lzma_index_end (combined_index, NULL);
320 return NULL;
323 /* Iterate over the indexes to find the number of blocks and
324 * the largest block.
326 static int
327 iter_indexes (lzma_index *idx,
328 size_t *nr_blocks, uint64_t *max_uncompressed_block_size)
330 lzma_index_iter iter;
332 *nr_blocks = 0;
333 *max_uncompressed_block_size = 0;
335 lzma_index_iter_init (&iter, idx);
336 while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) {
337 if (iter.block.uncompressed_size > *max_uncompressed_block_size)
338 *max_uncompressed_block_size = iter.block.uncompressed_size;
339 (*nr_blocks)++;
342 return 0;
345 void
346 xzfile_close (xzfile *xz)
348 lzma_index_end (xz->idx, NULL);
349 close (xz->fd);
350 free (xz);
353 uint64_t
354 xzfile_max_uncompressed_block_size (xzfile *xz)
356 return xz->max_uncompressed_block_size;
359 uint64_t
360 xzfile_get_size (xzfile *xz)
362 return lzma_index_uncompressed_size (xz->idx);
365 char *
366 xzfile_read_block (xzfile *xz, uint64_t offset,
367 uint64_t *start_rtn, uint64_t *size_rtn)
369 lzma_index_iter iter;
370 uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
371 lzma_block block;
372 lzma_filter filters[LZMA_FILTERS_MAX + 1];
373 lzma_ret r;
374 lzma_stream strm = LZMA_STREAM_INIT;
375 char *data;
376 ssize_t n;
377 size_t i;
379 /* Locate the block containing the uncompressed offset. */
380 lzma_index_iter_init (&iter, xz->idx);
381 if (lzma_index_iter_locate (&iter, offset)) {
382 nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset);
383 return NULL;
386 *start_rtn = iter.block.uncompressed_file_offset;
387 *size_rtn = iter.block.uncompressed_size;
389 nbdkit_debug ("seek: block number %d at file offset %" PRIu64,
390 (int) iter.block.number_in_file,
391 (uint64_t) iter.block.compressed_file_offset);
393 if (lseek (xz->fd, iter.block.compressed_file_offset, SEEK_SET) == -1) {
394 nbdkit_error ("lseek: %m");
395 return NULL;
398 /* Read the block header. Start by reading a single byte which
399 * tell us how big the block header is.
401 n = read (xz->fd, header, 1);
402 if (n == 0) {
403 nbdkit_error ("read: unexpected end of file reading block header byte");
404 return NULL;
406 if (n == -1) {
407 nbdkit_error ("read: %m");
408 return NULL;
411 if (header[0] == '\0') {
412 nbdkit_error ("read: unexpected invalid block in file, header[0] = 0");
413 return NULL;
416 block.version = 0;
417 block.check = iter.stream.flags->check;
418 block.filters = filters;
419 block.header_size = lzma_block_header_size_decode (header[0]);
421 /* Now read and decode the block header. */
422 n = read (xz->fd, &header[1], block.header_size-1);
423 if (n >= 0 && n != block.header_size-1) {
424 nbdkit_error ("read: unexpected end of file reading block header");
425 return NULL;
427 if (n == -1) {
428 nbdkit_error ("read: %m");
429 return NULL;
432 r = lzma_block_header_decode (&block, NULL, header);
433 if (r != LZMA_OK) {
434 nbdkit_error ("invalid block header (error %d)", r);
435 return NULL;
438 /* What this actually does is it checks that the block header
439 * matches the index.
441 r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
442 if (r != LZMA_OK) {
443 nbdkit_error ("cannot calculate compressed size (error %d)", r);
444 goto err1;
447 /* Read the block data. */
448 r = lzma_block_decoder (&strm, &block);
449 if (r != LZMA_OK) {
450 nbdkit_error ("invalid block (error %d)", r);
451 goto err1;
454 data = malloc (*size_rtn);
455 if (data == NULL) {
456 nbdkit_error ("malloc (%zu bytes): %m\n"
457 "NOTE: If this error occurs, you need to recompress your xz files with a smaller block size. Use: 'xz --block-size=16777216 ...'.",
458 *size_rtn);
459 goto err1;
462 strm.next_in = NULL;
463 strm.avail_in = 0;
464 strm.next_out = (uint8_t *) data;
465 strm.avail_out = block.uncompressed_size;
467 do {
468 uint8_t buf[BUFSIZ];
469 lzma_action action = LZMA_RUN;
471 if (strm.avail_in == 0) {
472 strm.next_in = buf;
473 n = read (xz->fd, buf, sizeof buf);
474 if (n == -1) {
475 nbdkit_error ("read: %m");
476 goto err2;
478 strm.avail_in = n;
479 if (n == 0)
480 action = LZMA_FINISH;
483 strm.avail_in = n;
484 strm.next_in = buf;
485 r = lzma_code (&strm, action);
486 } while (r == LZMA_OK);
488 if (r != LZMA_OK && r != LZMA_STREAM_END) {
489 nbdkit_error ("could not parse block data (error %d)", r);
490 goto err2;
493 lzma_end (&strm);
495 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
496 free (filters[i].options);
498 return data;
500 err2:
501 free (data);
502 lzma_end (&strm);
503 err1:
504 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
505 free (filters[i].options);
507 return NULL;