Update Red Hat Copyright Notices
[nbdkit.git] / filters / xz / xzfile.c
blob6bd641a92a5968675224c9bafd6890ef355f73f2
1 /* nbdkit
2 * Copyright Red Hat
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * * Neither the name of Red Hat nor the names of its contributors may be
16 * used to endorse or promote products derived from this software without
17 * specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
22 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
23 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
26 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
33 /* liblzma is a complex interface, so abstract it here. */
35 #include <config.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <stdbool.h>
40 #include <string.h>
41 #include <stdint.h>
42 #include <inttypes.h>
43 #include <unistd.h>
44 #include <fcntl.h>
45 #include <sys/types.h>
47 #include <nbdkit-filter.h>
49 #include <lzma.h>
51 #include "cleanup.h"
53 #include "xzfile.h"
55 #define XZ_HEADER_MAGIC "\xfd" "7zXZ\0"
56 #define XZ_HEADER_MAGIC_LEN 6
57 #define XZ_FOOTER_MAGIC "YZ"
58 #define XZ_FOOTER_MAGIC_LEN 2
60 struct xzfile {
61 lzma_index *idx;
62 size_t nr_streams;
63 size_t nr_blocks;
64 uint64_t max_uncompressed_block_size;
67 static bool check_header_magic (nbdkit_next *next);
68 static lzma_index *parse_indexes (nbdkit_next *next, size_t *);
69 static int iter_indexes (lzma_index *idx, size_t *, uint64_t *);
71 xzfile *
72 xzfile_open (nbdkit_next *next)
74 xzfile *xz;
75 uint64_t size;
77 xz = malloc (sizeof *xz);
78 if (xz == NULL) {
79 nbdkit_error ("malloc: %m");
80 return NULL;
83 /* Check file magic. */
84 if (!check_header_magic (next)) {
85 nbdkit_error ("xz: not an xz file");
86 goto err1;
89 /* Read and parse the indexes. */
90 xz->idx = parse_indexes (next, &xz->nr_streams);
91 if (xz->idx == NULL)
92 goto err1;
94 /* Iterate over indexes to find the number of and largest block. */
95 if (iter_indexes (xz->idx,
96 &xz->nr_blocks, &xz->max_uncompressed_block_size) == -1)
97 goto err1;
99 size = lzma_index_uncompressed_size (xz->idx);
100 nbdkit_debug ("xz: size %" PRIu64 " bytes (%.1fM)",
101 size, size / 1024.0 / 1024.0);
102 nbdkit_debug ("xz: %zu streams, %zu blocks", xz->nr_streams, xz->nr_blocks);
103 nbdkit_debug ("xz: maximum uncompressed block size %" PRIu64 " bytes (%.1fM)",
104 xz->max_uncompressed_block_size,
105 xz->max_uncompressed_block_size / 1024.0 / 1024.0);
107 return xz;
109 err1:
110 free (xz);
111 return NULL;
114 static bool
115 check_header_magic (nbdkit_next *next)
117 char buf[XZ_HEADER_MAGIC_LEN];
118 int err;
120 if (next->get_size (next) < XZ_HEADER_MAGIC_LEN) {
121 nbdkit_error ("xz: file too short");
122 return false;
124 if (next->pread (next, buf, XZ_HEADER_MAGIC_LEN, 0, 0, &err) == -1) {
125 nbdkit_error ("xz: could not read header magic: error %d", err);
126 return false;
128 if (memcmp (buf, XZ_HEADER_MAGIC, XZ_HEADER_MAGIC_LEN) != 0)
129 return false;
130 return true;
133 /* For explanation of this function, see src/xz/list.c:parse_indexes
134 * in the xz sources.
136 static lzma_index *
137 parse_indexes (nbdkit_next *next,
138 size_t *nr_streams)
140 lzma_ret r;
141 int64_t size, pos, index_size, offs;
142 int err;
143 uint8_t footer[LZMA_STREAM_HEADER_SIZE];
144 uint8_t header[LZMA_STREAM_HEADER_SIZE];
145 lzma_stream_flags footer_flags;
146 lzma_stream_flags header_flags;
147 lzma_stream strm = LZMA_STREAM_INIT;
148 lzma_index *combined_index = NULL;
149 lzma_index *this_index = NULL;
150 lzma_vli stream_padding = 0;
152 *nr_streams = 0;
154 /* Check file size is a multiple of 4 bytes. */
155 pos = size = next->get_size (next);
156 if (pos == -1) {
157 nbdkit_error ("xz: get_size: %m");
158 goto err;
160 if ((pos & 3) != 0) {
161 nbdkit_error ("xz: not an xz file: size is not a multiple of 4 bytes");
162 goto err;
165 /* Jump backwards through the file identifying each stream. */
166 while (pos > 0) {
167 nbdkit_debug ("looping through streams: pos = %" PRIi64, pos);
169 if (pos < LZMA_STREAM_HEADER_SIZE) {
170 nbdkit_error ("xz: corrupted file at %" PRIi64, pos);
171 goto err;
174 if (next->pread (next, footer, LZMA_STREAM_HEADER_SIZE,
175 pos - LZMA_STREAM_HEADER_SIZE, 0, &err) == -1) {
176 nbdkit_error ("xz: read stream footer: error %d", err);
177 goto err;
179 /* Skip stream padding. */
180 if (footer[8] == 0 && footer[9] == 0 &&
181 footer[10] == 0 && footer[11] == 0) {
182 stream_padding += 4;
183 pos -= 4;
184 continue;
187 pos -= LZMA_STREAM_HEADER_SIZE;
188 (*nr_streams)++;
190 nbdkit_debug ("decode stream footer at pos = %" PRIi64, pos);
192 /* Does the stream footer look reasonable? */
193 r = lzma_stream_footer_decode (&footer_flags, footer);
194 if (r != LZMA_OK) {
195 nbdkit_error ("xz: invalid stream footer (error %d)", r);
196 goto err;
198 nbdkit_debug ("backward_size = %" PRIu64,
199 (uint64_t) footer_flags.backward_size);
200 index_size = footer_flags.backward_size;
201 if (pos < index_size + LZMA_STREAM_HEADER_SIZE) {
202 nbdkit_error ("xz: invalid stream footer");
203 goto err;
206 pos -= index_size;
207 nbdkit_debug ("decode index at pos = %" PRIi64, pos);
209 /* Decode the index. */
210 r = lzma_index_decoder (&strm, &this_index, UINT64_MAX);
211 if (r != LZMA_OK) {
212 nbdkit_error ("xz: invalid stream index (error %d)", r);
213 goto err;
216 offs = pos;
217 do {
218 uint8_t buf[BUFSIZ];
220 strm.avail_in = index_size;
221 if (strm.avail_in > BUFSIZ)
222 strm.avail_in = BUFSIZ;
223 if (pos + strm.avail_in > size)
224 strm.avail_in = size - pos;
226 if (next->pread (next, buf, strm.avail_in, offs, 0, &err) == -1) {
227 nbdkit_error ("xz: read index: error %d", err);
228 goto err;
230 offs += strm.avail_in;
231 index_size -= strm.avail_in;
233 strm.next_in = buf;
234 r = lzma_code (&strm, LZMA_RUN);
235 } while (r == LZMA_OK);
237 if (r != LZMA_STREAM_END) {
238 nbdkit_error ("xz: could not parse index (error %d)", r);
239 goto err;
242 pos -= lzma_index_total_size (this_index) + LZMA_STREAM_HEADER_SIZE;
244 nbdkit_debug ("decode stream header at pos = %" PRIi64, pos);
246 /* Read and decode the stream header. */
247 if (next->pread (next, header, LZMA_STREAM_HEADER_SIZE, pos, 0,
248 &err) == -1) {
249 nbdkit_error ("xz: read stream header: error %d", err);
250 goto err;
253 r = lzma_stream_header_decode (&header_flags, header);
254 if (r != LZMA_OK) {
255 nbdkit_error ("xz: invalid stream header (error %d)", r);
256 goto err;
259 /* Header and footer of the stream should be equal. */
260 r = lzma_stream_flags_compare (&header_flags, &footer_flags);
261 if (r != LZMA_OK) {
262 nbdkit_error ("xz: header and footer of stream are not equal (error %d)",
264 goto err;
267 /* Store the decoded stream flags in this_index. */
268 r = lzma_index_stream_flags (this_index, &footer_flags);
269 if (r != LZMA_OK) {
270 nbdkit_error ("xz: cannot read stream_flags from index (error %d)", r);
271 goto err;
274 /* Store the amount of stream padding so far. Needed to calculate
275 * compressed offsets correctly in multi-stream files.
277 r = lzma_index_stream_padding (this_index, stream_padding);
278 if (r != LZMA_OK) {
279 nbdkit_error ("xz: cannot set stream_padding in index (error %d)", r);
280 goto err;
283 if (combined_index != NULL) {
284 r = lzma_index_cat (this_index, combined_index, NULL);
285 if (r != LZMA_OK) {
286 nbdkit_error ("xz: cannot combine indexes");
287 goto err;
291 combined_index = this_index;
292 this_index = NULL;
295 lzma_end (&strm);
297 return combined_index;
299 err:
300 lzma_end (&strm);
301 lzma_index_end (this_index, NULL);
302 lzma_index_end (combined_index, NULL);
303 return NULL;
306 /* Iterate over the indexes to find the number of blocks and
307 * the largest block.
309 static int
310 iter_indexes (lzma_index *idx,
311 size_t *nr_blocks, uint64_t *max_uncompressed_block_size)
313 lzma_index_iter iter;
315 *nr_blocks = 0;
316 *max_uncompressed_block_size = 0;
318 lzma_index_iter_init (&iter, idx);
319 while (!lzma_index_iter_next (&iter, LZMA_INDEX_ITER_NONEMPTY_BLOCK)) {
320 if (iter.block.uncompressed_size > *max_uncompressed_block_size)
321 *max_uncompressed_block_size = iter.block.uncompressed_size;
322 (*nr_blocks)++;
325 return 0;
328 void
329 xzfile_close (xzfile *xz)
331 if (xz) {
332 lzma_index_end (xz->idx, NULL);
333 free (xz);
337 uint64_t
338 xzfile_max_uncompressed_block_size (xzfile *xz)
340 return xz->max_uncompressed_block_size;
343 uint64_t
344 xzfile_get_size (xzfile *xz)
346 return lzma_index_uncompressed_size (xz->idx);
349 char *
350 xzfile_read_block (xzfile *xz,
351 nbdkit_next *next,
352 uint32_t flags, int *err,
353 uint64_t offset,
354 uint64_t *start_rtn, uint64_t *size_rtn)
356 int64_t offs, size;
357 lzma_index_iter iter;
358 uint8_t header[LZMA_BLOCK_HEADER_SIZE_MAX];
359 lzma_block block;
360 lzma_filter filters[LZMA_FILTERS_MAX + 1];
361 lzma_ret r;
362 lzma_stream strm = LZMA_STREAM_INIT;
363 const size_t bufsize = 1024 * 1024;
364 CLEANUP_FREE unsigned char *buf = NULL;
365 char *data = NULL;
366 size_t i;
368 /* Read the total size of the underlying disk, so we don't
369 * read over the end.
371 size = next->get_size (next);
372 if (size == -1) {
373 nbdkit_error ("xz: get_size: %m");
374 return NULL;
377 /* Locate the block containing the uncompressed offset. */
378 lzma_index_iter_init (&iter, xz->idx);
379 if (lzma_index_iter_locate (&iter, offset)) {
380 nbdkit_error ("cannot find offset %" PRIu64 " in the xz file", offset);
381 return NULL;
384 *start_rtn = iter.block.uncompressed_file_offset;
385 *size_rtn = iter.block.uncompressed_size;
387 nbdkit_debug ("seek: block number %d at file offset %" PRIu64,
388 (int) iter.block.number_in_file,
389 (uint64_t) iter.block.compressed_file_offset);
391 /* Read the block header. Start by reading a single byte which
392 * tell us how big the block header is.
394 offs = iter.block.compressed_file_offset;
395 if (next->pread (next, header, 1, offs, 0, err) == -1) {
396 nbdkit_error ("xz: read: could not read block header byte: error %d", *err);
397 return NULL;
399 offs++;
401 if (header[0] == '\0') {
402 nbdkit_error ("xz: read: unexpected invalid block in file, header[0] = 0");
403 return NULL;
406 block.version = 0;
407 block.check = iter.stream.flags->check;
408 block.filters = filters;
409 block.header_size = lzma_block_header_size_decode (header[0]);
411 /* Now read and decode the block header. */
412 if (next->pread (next, &header[1], block.header_size-1, offs,
413 0, err) == -1) {
414 nbdkit_error ("xz: read: could not read block of compressed data: "
415 "error %d", *err);
416 return NULL;
418 offs += block.header_size - 1;
420 r = lzma_block_header_decode (&block, NULL, header);
421 if (r != LZMA_OK) {
422 nbdkit_error ("invalid block header (error %d)", r);
423 return NULL;
426 /* What this actually does is it checks that the block header
427 * matches the index.
429 r = lzma_block_compressed_size (&block, iter.block.unpadded_size);
430 if (r != LZMA_OK) {
431 nbdkit_error ("cannot calculate compressed size (error %d)", r);
432 goto err1;
435 /* Read the block data. */
436 r = lzma_block_decoder (&strm, &block);
437 if (r != LZMA_OK) {
438 nbdkit_error ("invalid block (error %d)", r);
439 goto err1;
442 data = malloc (*size_rtn);
443 if (data == NULL) {
444 nbdkit_error ("malloc (%" PRIu64 " bytes): %m\n"
445 "NOTE: If this error occurs, you need to recompress your "
446 "xz files with a smaller block size. "
447 "Use: 'xz --block-size=16777216 ...'.",
448 *size_rtn);
449 goto err2;
452 buf = malloc (bufsize);
453 if (buf == NULL) {
454 nbdkit_error ("malloc: %m");
455 goto err2;
458 strm.next_in = NULL;
459 strm.avail_in = 0;
460 strm.next_out = (uint8_t *) data;
461 strm.avail_out = block.uncompressed_size;
462 do {
463 if (strm.avail_in == 0) {
464 strm.avail_in = bufsize;
465 if (offs + strm.avail_in > size)
466 strm.avail_in = size - offs;
467 if (strm.avail_in > 0) {
468 strm.next_in = buf;
469 if (next->pread (next, buf, strm.avail_in, offs, 0, err) == -1) {
470 nbdkit_error ("xz: read: error %d", *err);
471 goto err2;
473 offs += strm.avail_in;
477 r = lzma_code (&strm, LZMA_RUN);
478 } while (r == LZMA_OK);
480 if (r != LZMA_OK && r != LZMA_STREAM_END) {
481 nbdkit_error ("could not parse block data (error %d)", r);
482 goto err2;
485 lzma_end (&strm);
487 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
488 free (filters[i].options);
490 return data;
492 err2:
493 lzma_end (&strm);
494 err1:
495 for (i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
496 free (filters[i].options);
498 free (data);
500 return NULL;