2 * Copyright (c) 2008, Charles Wilson
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
12 * Written by Charles Wilson <cygwin@cygwin.com>
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
29 static inline uint32_t
30 le32dec(const void *pp
)
32 unsigned char const *p
= (unsigned char const *)pp
;
33 return ((p
[3] << 24) | (p
[2] << 16) | (p
[1] << 8) | p
[0]);
36 static inline uint64_t
37 le64dec(const void *pp
)
39 unsigned char const *p
= (unsigned char const *)pp
;
40 return (((uint64_t)le32dec(p
+ 4) << 32) | le32dec(p
));
44 * Predicate: the stream is open for read.
46 compress_xz::compress_xz (io_stream
* parent
)
52 compression_type (COMPRESSION_UNKNOWN
)
55 if (!parent
|| parent
->error())
67 compress_xz::create ()
69 unsigned char * out_block
= NULL
;
70 unsigned char * in_block
= NULL
;
72 state
= (struct private_data
*)calloc(sizeof(*state
), 1);
73 out_block
= (unsigned char *)malloc(out_block_size
);
74 in_block
= (unsigned char *)malloc(in_block_size
);
75 if (state
== NULL
|| out_block
== NULL
|| in_block
== NULL
)
84 memset(&(state
->stream
), 0x00, sizeof(state
->stream
));
85 state
->out_block_size
= out_block_size
;
86 state
->out_block
= out_block
;
87 state
->in_block_size
= in_block_size
;
88 state
->in_block
= in_block
;
89 state
->out_p
= state
->out_block
;
90 state
->stream
.avail_in
= 0;
91 state
->stream
.next_out
= state
->out_block
;
92 state
->stream
.avail_out
= state
->out_block_size
;
96 compress_xz::read (void *buffer
, size_t len
)
98 if ( compression_type
!= COMPRESSION_XZ
99 && compression_type
!= COMPRESSION_LZMA
)
104 /* there is no recovery from a busted stream */
114 /* peekbuf is layered on top of existing buffering code */
117 ssize_t tmplen
= std::min (this->peeklen
, len
);
118 this->peeklen
-= tmplen
;
119 memcpy (buffer
, this->peekbuf
, tmplen
);
120 memmove (this->peekbuf
, this->peekbuf
+ tmplen
, sizeof(this->peekbuf
) - tmplen
);
121 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
123 return tmpread
+ tmplen
;
128 if (state
->out_p
< state
->out_block
+ state
->out_pos
)
129 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
131 ssize_t tmplen
= std::min ((size_t)(state
->out_block
+ state
->out_pos
- state
->out_p
), len
);
132 memcpy (buffer
, state
->out_p
, tmplen
);
133 state
->out_p
+= tmplen
;
134 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
136 return tmpread
+ tmplen
;
141 size_t lenRemaining
= len
;
142 unsigned char * bufp
= (unsigned char *)buffer
;
144 size_t avail_out
= 0;
145 size_t decompressed
= 0;
147 /* if we made it here, any existing uncompressed data in out_block
148 * has been consumed, so reset out_p and out_pos
150 state
->out_p
= state
->out_block
;
154 if (state
->in_pos
== state
->in_size
)
156 /* no compressed data ready; read some more */
157 state
->in_size
= (size_t) this->original
->read(state
->in_block
, state
->in_block_size
);
161 avail_in
= state
->in_size
- state
->in_pos
; /* will be 0 if EOF */
162 avail_out
= state
->out_block_size
- state
->out_pos
;
164 state
->stream
.next_out
= state
->out_block
+ state
->out_pos
;
165 state
->stream
.avail_out
= avail_out
;
166 state
->stream
.next_in
= state
->in_block
+ state
->in_pos
;
167 state
->stream
.avail_in
= avail_in
;
169 lzma_ret res
= lzma_code (&(state
->stream
),
170 (state
->stream
.avail_in
== 0) ? LZMA_FINISH
: LZMA_RUN
);
172 consumed
= avail_in
- state
->stream
.avail_in
;
173 decompressed
= avail_out
- state
->stream
.avail_out
;
175 state
->in_pos
+= consumed
;
176 state
->out_pos
+= decompressed
;
178 ssize_t tmplen
= std::min (decompressed
, lenRemaining
);
179 memcpy (bufp
, state
->out_p
, tmplen
);
180 state
->out_p
+= tmplen
;
182 lenRemaining
-= tmplen
;
183 state
->total_out
+= decompressed
;
184 state
->total_in
+= consumed
;
188 case LZMA_STREAM_END
: /* Found end of stream. */
191 case LZMA_OK
: /* Decompressor made some progress. */
194 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
195 this->lasterr
= ENOMEM
;
197 case LZMA_MEMLIMIT_ERROR
:
198 LogPlainPrintf ("Lzma library error: Out of memory\n");
199 this->lasterr
= ENOMEM
;
201 case LZMA_FORMAT_ERROR
:
202 LogPlainPrintf ("Lzma library error: format not recognized\n");
203 this->lasterr
= EINVAL
;
205 case LZMA_OPTIONS_ERROR
:
206 LogPlainPrintf ("Lzma library error: Invalid options\n");
207 this->lasterr
= EINVAL
;
209 case LZMA_DATA_ERROR
:
210 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
211 this->lasterr
= EINVAL
;
214 LogPlainPrintf ("Lzma library error: No progress is possible\n");
215 this->lasterr
= EINVAL
;
217 case LZMA_PROG_ERROR
:
218 LogPlainPrintf ("Lzma library error: Internal error\n");
219 this->lasterr
= EINVAL
;
222 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res
);
223 this->lasterr
= EINVAL
;
227 while (lenRemaining
!= 0 && !state
->eof
);
229 return (len
- lenRemaining
);
233 compress_xz::write (const void *buffer
, size_t len
)
235 throw new std::logic_error("compress_xz::write is not implemented");
239 compress_xz::peek (void *buffer
, size_t len
)
241 /* can only peek 512 bytes */
245 if (len
> this->peeklen
)
247 size_t want
= len
- this->peeklen
;
248 ssize_t got
= read (&(this->peekbuf
[peeklen
]), want
);
250 this->peeklen
+= got
;
254 /* we may have read less than requested. */
255 memcpy (buffer
, this->peekbuf
, this->peeklen
);
256 return this->peeklen
;
260 memcpy (buffer
, this->peekbuf
, len
);
269 throw new std::logic_error("compress_xz::tell is not implemented");
273 compress_xz::seek (long where
, io_stream_seek_t whence
)
275 if ((whence
== IO_SEEK_SET
) && (where
== 0))
277 int result
= original
->seek(where
, whence
);
286 throw new std::logic_error("compress_xz::seek is not implemented");
290 compress_xz::error ()
296 compress_xz::set_mtime (time_t mtime
)
299 return original
->set_mtime (mtime
);
304 compress_xz::get_mtime ()
307 return original
->get_mtime ();
312 compress_xz::get_mode ()
315 return original
->get_mode ();
320 compress_xz::release_original ()
322 owns_original
= false;
326 compress_xz::destroy ()
330 if ( compression_type
== COMPRESSION_XZ
331 || compression_type
== COMPRESSION_LZMA
)
333 lzma_end(&(state
->stream
));
336 if (state
->out_block
)
338 free (state
->out_block
);
339 state
->out_block
= NULL
;
344 free (state
->in_block
);
345 state
->in_block
= NULL
;
351 compression_type
= COMPRESSION_UNKNOWN
;
355 compress_xz::~compress_xz ()
359 if (original
&& owns_original
)
363 /* ===========================================================================
364 * Check the header of a lzma_stream opened for reading, and initialize
365 * the appropriate decoder (xz or lzma).
367 * the stream has already been created sucessfully
368 * this method is called only once per stream
369 * OUT assertion - success:
370 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
371 * state->stream is initialized with the appropriate decoder
372 * lzma: the first 14 bytes of the stream are read (+ whatever
373 * the decoder itself consumes on initialization)
374 * xz: the first 6 bytes of the stram are read (+ whatever the
375 * decoder itself consumes on initialization)
377 * OUT assertion - error:
378 * last_error is non-zero
381 compress_xz::init_decoder (void)
383 unsigned char buf
[14];
385 this->compression_type
= COMPRESSION_UNKNOWN
;
387 /* read properties */
388 if (this->original
->peek (buf
, 6) != 6)
390 this->lasterr
= (errno
? errno
: EIO
);
394 if (bid_xz ((void *)buf
, 6) > 0)
396 this->compression_type
= COMPRESSION_XZ
;
400 if (this->original
->peek (buf
+ 6, 8) != 8)
402 this->lasterr
= (errno
? errno
: EIO
);
405 if (bid_lzma ((void *)buf
, 14) > 0)
407 this->compression_type
= COMPRESSION_LZMA
;
411 switch (compression_type
)
414 ret
= lzma_stream_decoder (&(state
->stream
),
415 (1U << 30),/* memlimit */
418 case COMPRESSION_LZMA
:
419 ret
= lzma_alone_decoder (&(state
->stream
),
420 (1U << 30));/* memlimit */
423 this->lasterr
= EINVAL
;
432 this->lasterr
= ENOMEM
;
434 case LZMA_OPTIONS_ERROR
:
435 this->lasterr
= EINVAL
;
438 this->lasterr
= EINVAL
;
444 compress_xz::is_xz_or_lzma (void * buffer
, size_t len
)
447 int bits_checked_lzma
;
449 bits_checked_xz
= bid_xz (buffer
, len
);
453 bits_checked_lzma
= bid_lzma (buffer
, len
);
454 if (bits_checked_lzma
)
461 * Portions of bid_xz() and bid_lzma() have been adapted from the
462 * libarchive archive_read_support_compression_xz.c functions
463 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
464 * the 2-clause (simplified) BSD license, reproduced below.
466 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
467 * Copyright (c) 2009 Michihiro NAKAJIMA
468 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
469 * All rights reserved.
471 * Redistribution and use in source and binary forms, with or without
472 * modification, are permitted provided that the following conditions
474 * 1. Redistributions of source code must retain the above copyright
475 * notice, this list of conditions and the following disclaimer.
476 * 2. Redistributions in binary form must reproduce the above copyright
477 * notice, this list of conditions and the following disclaimer in the
478 * documentation and/or other materials provided with the distribution.
480 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
481 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
482 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
483 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
484 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
485 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
486 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
487 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
488 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
489 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
492 compress_xz::bid_xz (void * buffer
, size_t len
)
494 const unsigned char *buf
;
497 buf
= (const unsigned char *)buffer
;
500 /* not enough peek'ed data in buf */
505 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
528 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked
);
530 return (bits_checked
);
534 compress_xz::bid_lzma (void * buffer
, size_t len
)
536 const unsigned char *buf
;
538 uint64_t uncompressed_size
;
543 /* not enough peek'ed data in buffer */
546 buf
= (unsigned char *)buffer
;
548 /* First byte of raw LZMA stream is commonly 0x5d.
549 * The first byte is a special number, which consists of
550 * three parameters of LZMA compression, a number of literal
551 * context bits(which is from 0 to 8, default is 3), a number
552 * of literal pos bits(which is from 0 to 4, default is 0),
553 * a number of pos bits(which is from 0 to 4, default is 2).
554 * The first byte is made by
555 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
556 * and so the default value in this field is
557 * (2 * 5 + 0) * 9 + 3 = 0x5d.
558 * lzma of LZMA SDK has options to change those parameters.
559 * It means a range of this field is from 0 to 224. And lzma of
560 * XZ Utils with option -e records 0x5e in this field. */
561 /* NOTE: If this checking of the first byte increases false
562 * recognition, we should allow only 0x5d and 0x5e for the first
563 * byte of LZMA stream. */
565 if (buf
[0] > (4 * 5 + 4) * 9 + 8)
567 /* Most likely value in the first byte of LZMA stream. */
568 if (buf
[0] == 0x5d || buf
[0] == 0x5e)
571 /* Sixth through fourteenth bytes are uncompressed size,
572 * stored in little-endian order. `-1' means uncompressed
573 * size is unknown and lzma of XZ Utils always records `-1'
575 uncompressed_size
= le64dec(buf
+5);
576 if (uncompressed_size
== (uint64_t)(-1))
579 /* Second through fifth bytes are dictionary size, stored in
580 * little-endian order. The minimum dictionary size is
581 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
582 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
583 * which the one uses with option -d27.
584 * NOTE: A comment of LZMA SDK source code says this dictionary
585 * range is from 1 << 12 to 1 << 30. */
586 dicsize
= le32dec(buf
+1);
589 case 0x00001000:/* lzma of LZMA SDK option -d12. */
590 case 0x00002000:/* lzma of LZMA SDK option -d13. */
591 case 0x00004000:/* lzma of LZMA SDK option -d14. */
592 case 0x00008000:/* lzma of LZMA SDK option -d15. */
593 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
594 * lzma of LZMA SDK option -d16. */
595 case 0x00020000:/* lzma of LZMA SDK option -d17. */
596 case 0x00040000:/* lzma of LZMA SDK option -d18. */
597 case 0x00080000:/* lzma of XZ Utils option -2.
598 * lzma of LZMA SDK option -d19. */
599 case 0x00100000:/* lzma of XZ Utils option -3.
600 * lzma of LZMA SDK option -d20. */
601 case 0x00200000:/* lzma of XZ Utils option -4.
602 * lzma of LZMA SDK option -d21. */
603 case 0x00400000:/* lzma of XZ Utils option -5.
604 * lzma of LZMA SDK option -d22. */
605 case 0x00800000:/* lzma of XZ Utils option -6.
606 * lzma of LZMA SDK option -d23. */
607 case 0x01000000:/* lzma of XZ Utils option -7.
608 * lzma of LZMA SDK option -d24. */
609 case 0x02000000:/* lzma of XZ Utils option -8.
610 * lzma of LZMA SDK option -d25. */
611 case 0x04000000:/* lzma of XZ Utils option -9.
612 * lzma of LZMA SDK option -d26. */
613 case 0x08000000:/* lzma of LZMA SDK option -d27. */
617 /* If a memory usage for encoding was not enough on
618 * the platform where LZMA stream was made, lzma of
619 * XZ Utils automatically decreased the dictionary
620 * size to enough memory for encoding by 1Mi bytes
622 if (dicsize
<= 0x03F00000 && dicsize
>= 0x00300000
623 && (dicsize
& ((1 << 20)-1)) == 0
624 && bits_checked
== 8 + 64)
629 /* Otherwise dictionary size is unlikely. But it is
630 * possible that someone makes lzma stream with
631 * liblzma/LZMA SDK in one's dictionary size. */
635 /* TODO: The above test is still very weak. It would be
636 * good to do better. */
638 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked
);
640 return (bits_checked
);