2 * Copyright (c) 2008, Charles Wilson
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
12 * Written by Charles Wilson <cygwin@cygwin.com>
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
29 static inline uint32_t
30 le32dec(const void *pp
)
32 unsigned char const *p
= (unsigned char const *)pp
;
33 return ((p
[3] << 24) | (p
[2] << 16) | (p
[1] << 8) | p
[0]);
36 static inline uint64_t
37 le64dec(const void *pp
)
39 unsigned char const *p
= (unsigned char const *)pp
;
40 return (((uint64_t)le32dec(p
+ 4) << 32) | le32dec(p
));
44 * Predicate: the stream is open for read.
46 compress_xz::compress_xz (io_stream
* parent
)
52 compression_type (COMPRESSION_UNKNOWN
)
55 if (!parent
|| parent
->error())
67 compress_xz::create ()
69 unsigned char * out_block
= NULL
;
70 unsigned char * in_block
= NULL
;
72 state
= (struct private_data
*)calloc(sizeof(*state
), 1);
73 out_block
= (unsigned char *)malloc(out_block_size
);
74 in_block
= (unsigned char *)malloc(in_block_size
);
75 if (state
== NULL
|| out_block
== NULL
|| in_block
== NULL
)
84 memset(&(state
->stream
), 0x00, sizeof(state
->stream
));
85 state
->out_block_size
= out_block_size
;
86 state
->out_block
= out_block
;
87 state
->in_block_size
= in_block_size
;
88 state
->in_block
= in_block
;
89 state
->out_p
= state
->out_block
;
90 state
->stream
.avail_in
= 0;
91 state
->stream
.next_out
= state
->out_block
;
92 state
->stream
.avail_out
= state
->out_block_size
;
96 compress_xz::read (void *buffer
, size_t len
)
98 if ( compression_type
!= COMPRESSION_XZ
99 && compression_type
!= COMPRESSION_LZMA
)
104 /* there is no recovery from a busted stream */
114 /* peekbuf is layered on top of existing buffering code */
117 ssize_t tmplen
= std::min (this->peeklen
, len
);
118 this->peeklen
-= tmplen
;
119 memcpy (buffer
, this->peekbuf
, tmplen
);
120 memmove (this->peekbuf
, this->peekbuf
+ tmplen
, sizeof(this->peekbuf
) - tmplen
);
121 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
123 return tmpread
+ tmplen
;
128 if (state
->out_p
< state
->out_block
+ state
->out_pos
)
129 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
131 ssize_t tmplen
= std::min ((size_t)(state
->out_block
+ state
->out_pos
- state
->out_p
), len
);
132 memcpy (buffer
, state
->out_p
, tmplen
);
133 state
->out_p
+= tmplen
;
134 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
136 return tmpread
+ tmplen
;
141 size_t lenRemaining
= len
;
142 unsigned char * bufp
= (unsigned char *)buffer
;
144 size_t avail_out
= 0;
145 size_t decompressed
= 0;
147 /* if we made it here, any existing uncompressed data in out_block
148 * has been consumed, so reset out_p and out_pos
150 state
->out_p
= state
->out_block
;
154 if (state
->in_pos
== state
->in_size
)
156 /* no compressed data ready; read some more */
157 state
->in_size
= (size_t) this->original
->read(state
->in_block
, state
->in_block_size
);
158 /* We don't care for error vs EOF */
159 if (state
->in_size
< 0)
164 avail_in
= state
->in_size
- state
->in_pos
; /* will be 0 if EOF */
165 avail_out
= state
->out_block_size
- state
->out_pos
;
167 state
->stream
.next_out
= state
->out_block
+ state
->out_pos
;
168 state
->stream
.avail_out
= avail_out
;
169 state
->stream
.next_in
= state
->in_block
+ state
->in_pos
;
170 state
->stream
.avail_in
= avail_in
;
172 lzma_ret res
= lzma_code (&(state
->stream
),
173 (state
->stream
.avail_in
== 0) ? LZMA_FINISH
: LZMA_RUN
);
175 consumed
= avail_in
- state
->stream
.avail_in
;
176 decompressed
= avail_out
- state
->stream
.avail_out
;
178 state
->in_pos
+= consumed
;
179 state
->out_pos
+= decompressed
;
181 ssize_t tmplen
= std::min (decompressed
, lenRemaining
);
182 memcpy (bufp
, state
->out_p
, tmplen
);
183 state
->out_p
+= tmplen
;
185 lenRemaining
-= tmplen
;
186 state
->total_out
+= decompressed
;
187 state
->total_in
+= consumed
;
191 case LZMA_STREAM_END
: /* Found end of stream. */
194 case LZMA_OK
: /* Decompressor made some progress. */
197 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
198 this->lasterr
= ENOMEM
;
200 case LZMA_MEMLIMIT_ERROR
:
201 LogPlainPrintf ("Lzma library error: Out of memory\n");
202 this->lasterr
= ENOMEM
;
204 case LZMA_FORMAT_ERROR
:
205 LogPlainPrintf ("Lzma library error: format not recognized\n");
206 this->lasterr
= EINVAL
;
208 case LZMA_OPTIONS_ERROR
:
209 LogPlainPrintf ("Lzma library error: Invalid options\n");
210 this->lasterr
= EINVAL
;
212 case LZMA_DATA_ERROR
:
213 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
214 this->lasterr
= EINVAL
;
217 LogPlainPrintf ("Lzma library error: No progress is possible\n");
218 this->lasterr
= EINVAL
;
220 case LZMA_PROG_ERROR
:
221 LogPlainPrintf ("Lzma library error: Internal error\n");
222 this->lasterr
= EINVAL
;
225 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res
);
226 this->lasterr
= EINVAL
;
230 while (lenRemaining
!= 0 && !state
->eof
);
232 return (len
- lenRemaining
);
236 compress_xz::write (const void *buffer
, size_t len
)
238 throw new std::logic_error("compress_xz::write is not implemented");
242 compress_xz::peek (void *buffer
, size_t len
)
244 /* can only peek 512 bytes */
248 if (len
> this->peeklen
)
250 size_t want
= len
- this->peeklen
;
251 ssize_t got
= read (&(this->peekbuf
[peeklen
]), want
);
253 this->peeklen
+= got
;
257 /* we may have read less than requested. */
258 memcpy (buffer
, this->peekbuf
, this->peeklen
);
259 return this->peeklen
;
263 memcpy (buffer
, this->peekbuf
, len
);
272 throw new std::logic_error("compress_xz::tell is not implemented");
276 compress_xz::seek (off_t where
, io_stream_seek_t whence
)
278 if ((whence
== IO_SEEK_SET
) && (where
== 0))
280 off_t result
= original
->seek(where
, whence
);
289 throw new std::logic_error("compress_xz::seek is not implemented");
293 compress_xz::error ()
299 compress_xz::set_mtime (time_t mtime
)
302 return original
->set_mtime (mtime
);
307 compress_xz::get_mtime ()
310 return original
->get_mtime ();
315 compress_xz::get_mode ()
318 return original
->get_mode ();
323 compress_xz::release_original ()
325 owns_original
= false;
329 compress_xz::destroy ()
333 if ( compression_type
== COMPRESSION_XZ
334 || compression_type
== COMPRESSION_LZMA
)
336 lzma_end(&(state
->stream
));
339 if (state
->out_block
)
341 free (state
->out_block
);
342 state
->out_block
= NULL
;
347 free (state
->in_block
);
348 state
->in_block
= NULL
;
354 compression_type
= COMPRESSION_UNKNOWN
;
358 compress_xz::~compress_xz ()
362 if (original
&& owns_original
)
366 /* ===========================================================================
367 * Check the header of a lzma_stream opened for reading, and initialize
368 * the appropriate decoder (xz or lzma).
370 * the stream has already been created sucessfully
371 * this method is called only once per stream
372 * OUT assertion - success:
373 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
374 * state->stream is initialized with the appropriate decoder
375 * lzma: the first 14 bytes of the stream are read (+ whatever
376 * the decoder itself consumes on initialization)
377 * xz: the first 6 bytes of the stram are read (+ whatever the
378 * decoder itself consumes on initialization)
380 * OUT assertion - error:
381 * last_error is non-zero
384 compress_xz::init_decoder (void)
386 unsigned char buf
[14];
388 this->compression_type
= COMPRESSION_UNKNOWN
;
390 /* read properties */
391 if (this->original
->peek (buf
, 6) != 6)
393 this->lasterr
= (errno
? errno
: EIO
);
397 if (bid_xz ((void *)buf
, 6) > 0)
399 this->compression_type
= COMPRESSION_XZ
;
403 if (this->original
->peek (buf
+ 6, 8) != 8)
405 this->lasterr
= (errno
? errno
: EIO
);
408 if (bid_lzma ((void *)buf
, 14) > 0)
410 this->compression_type
= COMPRESSION_LZMA
;
414 switch (compression_type
)
417 ret
= lzma_stream_decoder (&(state
->stream
),
418 (1U << 30),/* memlimit */
421 case COMPRESSION_LZMA
:
422 ret
= lzma_alone_decoder (&(state
->stream
),
423 (1U << 30));/* memlimit */
426 this->lasterr
= EINVAL
;
435 this->lasterr
= ENOMEM
;
437 case LZMA_OPTIONS_ERROR
:
438 this->lasterr
= EINVAL
;
441 this->lasterr
= EINVAL
;
447 compress_xz::is_xz_or_lzma (void * buffer
, size_t len
)
450 int bits_checked_lzma
;
452 bits_checked_xz
= bid_xz (buffer
, len
);
456 bits_checked_lzma
= bid_lzma (buffer
, len
);
457 if (bits_checked_lzma
)
464 * Portions of bid_xz() and bid_lzma() have been adapted from the
465 * libarchive archive_read_support_compression_xz.c functions
466 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
467 * the 2-clause (simplified) BSD license, reproduced below.
469 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
470 * Copyright (c) 2009 Michihiro NAKAJIMA
471 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
472 * All rights reserved.
474 * Redistribution and use in source and binary forms, with or without
475 * modification, are permitted provided that the following conditions
477 * 1. Redistributions of source code must retain the above copyright
478 * notice, this list of conditions and the following disclaimer.
479 * 2. Redistributions in binary form must reproduce the above copyright
480 * notice, this list of conditions and the following disclaimer in the
481 * documentation and/or other materials provided with the distribution.
483 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
484 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
485 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
486 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
487 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
488 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
489 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
490 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
491 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
492 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
495 compress_xz::bid_xz (void * buffer
, size_t len
)
497 const unsigned char *buf
;
500 buf
= (const unsigned char *)buffer
;
503 /* not enough peek'ed data in buf */
508 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
531 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked
);
533 return (bits_checked
);
537 compress_xz::bid_lzma (void * buffer
, size_t len
)
539 const unsigned char *buf
;
541 uint64_t uncompressed_size
;
546 /* not enough peek'ed data in buffer */
549 buf
= (unsigned char *)buffer
;
551 /* First byte of raw LZMA stream is commonly 0x5d.
552 * The first byte is a special number, which consists of
553 * three parameters of LZMA compression, a number of literal
554 * context bits(which is from 0 to 8, default is 3), a number
555 * of literal pos bits(which is from 0 to 4, default is 0),
556 * a number of pos bits(which is from 0 to 4, default is 2).
557 * The first byte is made by
558 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
559 * and so the default value in this field is
560 * (2 * 5 + 0) * 9 + 3 = 0x5d.
561 * lzma of LZMA SDK has options to change those parameters.
562 * It means a range of this field is from 0 to 224. And lzma of
563 * XZ Utils with option -e records 0x5e in this field. */
564 /* NOTE: If this checking of the first byte increases false
565 * recognition, we should allow only 0x5d and 0x5e for the first
566 * byte of LZMA stream. */
568 if (buf
[0] > (4 * 5 + 4) * 9 + 8)
570 /* Most likely value in the first byte of LZMA stream. */
571 if (buf
[0] == 0x5d || buf
[0] == 0x5e)
574 /* Sixth through fourteenth bytes are uncompressed size,
575 * stored in little-endian order. `-1' means uncompressed
576 * size is unknown and lzma of XZ Utils always records `-1'
578 uncompressed_size
= le64dec(buf
+5);
579 if (uncompressed_size
== (uint64_t)(-1))
582 /* Second through fifth bytes are dictionary size, stored in
583 * little-endian order. The minimum dictionary size is
584 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
585 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
586 * which the one uses with option -d27.
587 * NOTE: A comment of LZMA SDK source code says this dictionary
588 * range is from 1 << 12 to 1 << 30. */
589 dicsize
= le32dec(buf
+1);
592 case 0x00001000:/* lzma of LZMA SDK option -d12. */
593 case 0x00002000:/* lzma of LZMA SDK option -d13. */
594 case 0x00004000:/* lzma of LZMA SDK option -d14. */
595 case 0x00008000:/* lzma of LZMA SDK option -d15. */
596 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
597 * lzma of LZMA SDK option -d16. */
598 case 0x00020000:/* lzma of LZMA SDK option -d17. */
599 case 0x00040000:/* lzma of LZMA SDK option -d18. */
600 case 0x00080000:/* lzma of XZ Utils option -2.
601 * lzma of LZMA SDK option -d19. */
602 case 0x00100000:/* lzma of XZ Utils option -3.
603 * lzma of LZMA SDK option -d20. */
604 case 0x00200000:/* lzma of XZ Utils option -4.
605 * lzma of LZMA SDK option -d21. */
606 case 0x00400000:/* lzma of XZ Utils option -5.
607 * lzma of LZMA SDK option -d22. */
608 case 0x00800000:/* lzma of XZ Utils option -6.
609 * lzma of LZMA SDK option -d23. */
610 case 0x01000000:/* lzma of XZ Utils option -7.
611 * lzma of LZMA SDK option -d24. */
612 case 0x02000000:/* lzma of XZ Utils option -8.
613 * lzma of LZMA SDK option -d25. */
614 case 0x04000000:/* lzma of XZ Utils option -9.
615 * lzma of LZMA SDK option -d26. */
616 case 0x08000000:/* lzma of LZMA SDK option -d27. */
620 /* If a memory usage for encoding was not enough on
621 * the platform where LZMA stream was made, lzma of
622 * XZ Utils automatically decreased the dictionary
623 * size to enough memory for encoding by 1Mi bytes
625 if (dicsize
<= 0x03F00000 && dicsize
>= 0x00300000
626 && (dicsize
& ((1 << 20)-1)) == 0
627 && bits_checked
== 8 + 64)
632 /* Otherwise dictionary size is unlikely. But it is
633 * possible that someone makes lzma stream with
634 * liblzma/LZMA SDK in one's dictionary size. */
638 /* TODO: The above test is still very weak. It would be
639 * good to do better. */
641 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked
);
643 return (bits_checked
);