Show overall progress for checking packages in package cache
[cygwin-setup.git] / compress_xz.cc
blobd3f4886c3cd91b8d65488ff3f450dade13f96b8b
1 /*
2 * Copyright (c) 2008, Charles Wilson
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
10 * http://www.gnu.org/
12 * Written by Charles Wilson <cygwin@cygwin.com>
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
17 * below).
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
23 #include <stdexcept>
25 #include <errno.h>
26 #include <memory.h>
27 #include <malloc.h>
29 static inline uint32_t
30 le32dec(const void *pp)
32 unsigned char const *p = (unsigned char const *)pp;
33 return ((p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
36 static inline uint64_t
37 le64dec(const void *pp)
39 unsigned char const *p = (unsigned char const *)pp;
40 return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
44 * Predicate: the stream is open for read.
46 compress_xz::compress_xz (io_stream * parent)
48 original(NULL),
49 owns_original(true),
50 peeklen(0),
51 lasterr(0),
52 compression_type (COMPRESSION_UNKNOWN)
54 /* read only */
55 if (!parent || parent->error())
57 lasterr = EBADF;
58 return;
60 original = parent;
62 create ();
63 init_decoder ();
66 void
67 compress_xz::create ()
69 unsigned char * out_block = NULL;
70 unsigned char * in_block = NULL;
72 state = (struct private_data *)calloc(sizeof(*state), 1);
73 out_block = (unsigned char *)malloc(out_block_size);
74 in_block = (unsigned char *)malloc(in_block_size);
75 if (state == NULL || out_block == NULL || in_block == NULL)
77 free(out_block);
78 free(in_block);
79 free(state);
80 lasterr = ENOMEM;
81 return;
84 memset(&(state->stream), 0x00, sizeof(state->stream));
85 state->out_block_size = out_block_size;
86 state->out_block = out_block;
87 state->in_block_size = in_block_size;
88 state->in_block = in_block;
89 state->out_p = state->out_block;
90 state->stream.avail_in = 0;
91 state->stream.next_out = state->out_block;
92 state->stream.avail_out = state->out_block_size;
95 ssize_t
96 compress_xz::read (void *buffer, size_t len)
98 if ( compression_type != COMPRESSION_XZ
99 && compression_type != COMPRESSION_LZMA)
101 return -1;
104 /* there is no recovery from a busted stream */
105 if (this->lasterr)
107 return -1;
109 if (len == 0)
111 return 0;
114 /* peekbuf is layered on top of existing buffering code */
115 if (this->peeklen)
117 ssize_t tmplen = std::min (this->peeklen, len);
118 this->peeklen -= tmplen;
119 memcpy (buffer, this->peekbuf, tmplen);
120 memmove (this->peekbuf, this->peekbuf + tmplen, sizeof(this->peekbuf) - tmplen);
121 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
122 if (tmpread >= 0)
123 return tmpread + tmplen;
124 else
125 return tmpread;
128 if (state->out_p < state->out_block + state->out_pos)
129 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
131 ssize_t tmplen = std::min ((size_t)(state->out_block + state->out_pos - state->out_p), len);
132 memcpy (buffer, state->out_p, tmplen);
133 state->out_p += tmplen;
134 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
135 if (tmpread >= 0)
136 return tmpread + tmplen;
137 else
138 return tmpread;
141 size_t lenRemaining = len;
142 unsigned char * bufp = (unsigned char *)buffer;
143 size_t avail_in = 0;
144 size_t avail_out = 0;
145 size_t decompressed = 0;
146 size_t consumed = 0;
147 /* if we made it here, any existing uncompressed data in out_block
148 * has been consumed, so reset out_p and out_pos
150 state->out_p = state->out_block;
151 state->out_pos = 0;
154 if (state->in_pos == state->in_size)
156 /* no compressed data ready; read some more */
157 state->in_size = (size_t) this->original->read(state->in_block, state->in_block_size);
158 state->in_pos = 0;
161 avail_in = state->in_size - state->in_pos; /* will be 0 if EOF */
162 avail_out = state->out_block_size - state->out_pos;
164 state->stream.next_out = state->out_block + state->out_pos;
165 state->stream.avail_out = avail_out;
166 state->stream.next_in = state->in_block + state->in_pos;
167 state->stream.avail_in = avail_in;
169 lzma_ret res = lzma_code (&(state->stream),
170 (state->stream.avail_in == 0) ? LZMA_FINISH : LZMA_RUN);
172 consumed = avail_in - state->stream.avail_in;
173 decompressed = avail_out - state->stream.avail_out;
175 state->in_pos += consumed;
176 state->out_pos += decompressed;
178 ssize_t tmplen = std::min (decompressed, lenRemaining);
179 memcpy (bufp, state->out_p, tmplen);
180 state->out_p += tmplen;
181 bufp += tmplen;
182 lenRemaining -= tmplen;
183 state->total_out += decompressed;
184 state->total_in += consumed;
186 switch (res)
188 case LZMA_STREAM_END: /* Found end of stream. */
189 state->eof = 1;
190 /* FALL THROUGH */
191 case LZMA_OK: /* Decompressor made some progress. */
192 break;
193 case LZMA_MEM_ERROR:
194 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
195 this->lasterr = ENOMEM;
196 return -1;
197 case LZMA_MEMLIMIT_ERROR:
198 LogPlainPrintf ("Lzma library error: Out of memory\n");
199 this->lasterr = ENOMEM;
200 return -1;
201 case LZMA_FORMAT_ERROR:
202 LogPlainPrintf ("Lzma library error: format not recognized\n");
203 this->lasterr = EINVAL;
204 return -1;
205 case LZMA_OPTIONS_ERROR:
206 LogPlainPrintf ("Lzma library error: Invalid options\n");
207 this->lasterr = EINVAL;
208 return -1;
209 case LZMA_DATA_ERROR:
210 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
211 this->lasterr = EINVAL;
212 return -1;
213 case LZMA_BUF_ERROR:
214 LogPlainPrintf ("Lzma library error: No progress is possible\n");
215 this->lasterr = EINVAL;
216 return -1;
217 case LZMA_PROG_ERROR:
218 LogPlainPrintf ("Lzma library error: Internal error\n");
219 this->lasterr = EINVAL;
220 return -1;
221 default:
222 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res);
223 this->lasterr = EINVAL;
224 return -1;
227 while (lenRemaining != 0 && !state->eof);
229 return (len - lenRemaining);
232 ssize_t
233 compress_xz::write (const void *buffer, size_t len)
235 throw new std::logic_error("compress_xz::write is not implemented");
238 ssize_t
239 compress_xz::peek (void *buffer, size_t len)
241 /* can only peek 512 bytes */
242 if (len > 512)
243 return ENOMEM;
245 if (len > this->peeklen)
247 size_t want = len - this->peeklen;
248 ssize_t got = read (&(this->peekbuf[peeklen]), want);
249 if (got >= 0)
250 this->peeklen += got;
251 else
252 /* error */
253 return got;
254 /* we may have read less than requested. */
255 memcpy (buffer, this->peekbuf, this->peeklen);
256 return this->peeklen;
258 else
260 memcpy (buffer, this->peekbuf, len);
261 return len;
263 return 0;
266 long
267 compress_xz::tell ()
269 throw new std::logic_error("compress_xz::tell is not implemented");
273 compress_xz::seek (long where, io_stream_seek_t whence)
275 if ((whence == IO_SEEK_SET) && (where == 0))
277 int result = original->seek(where, whence);
278 destroy ();
279 peeklen = 0;
280 lasterr = 0;
281 create ();
282 init_decoder ();
283 return result;
286 throw new std::logic_error("compress_xz::seek is not implemented");
290 compress_xz::error ()
292 return lasterr;
296 compress_xz::set_mtime (time_t mtime)
298 if (original)
299 return original->set_mtime (mtime);
300 return 1;
303 time_t
304 compress_xz::get_mtime ()
306 if (original)
307 return original->get_mtime ();
308 return 0;
311 mode_t
312 compress_xz::get_mode ()
314 if (original)
315 return original->get_mode ();
316 return 0;
319 void
320 compress_xz::release_original ()
322 owns_original = false;
325 void
326 compress_xz::destroy ()
328 if (state)
330 if ( compression_type == COMPRESSION_XZ
331 || compression_type == COMPRESSION_LZMA)
333 lzma_end(&(state->stream));
336 if (state->out_block)
338 free (state->out_block);
339 state->out_block = NULL;
342 if (state->in_block)
344 free (state->in_block);
345 state->in_block = NULL;
348 free(state);
349 state = NULL;
351 compression_type = COMPRESSION_UNKNOWN;
355 compress_xz::~compress_xz ()
357 destroy ();
359 if (original && owns_original)
360 delete original;
363 /* ===========================================================================
364 * Check the header of a lzma_stream opened for reading, and initialize
365 * the appropriate decoder (xz or lzma).
366 * IN assertion:
367 * the stream has already been created sucessfully
368 * this method is called only once per stream
369 * OUT assertion - success:
370 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
371 * state->stream is initialized with the appropriate decoder
372 * lzma: the first 14 bytes of the stream are read (+ whatever
373 * the decoder itself consumes on initialization)
374 * xz: the first 6 bytes of the stram are read (+ whatever the
375 * decoder itself consumes on initialization)
376 * last_error is zero
377 * OUT assertion - error:
378 * last_error is non-zero
380 void
381 compress_xz::init_decoder (void)
383 unsigned char buf[14];
384 int ret;
385 this->compression_type = COMPRESSION_UNKNOWN;
387 /* read properties */
388 if (this->original->peek (buf, 6) != 6)
390 this->lasterr = (errno ? errno : EIO);
391 return;
394 if (bid_xz ((void *)buf, 6) > 0)
396 this->compression_type = COMPRESSION_XZ;
398 else
400 if (this->original->peek (buf + 6, 8) != 8)
402 this->lasterr = (errno ? errno : EIO);
403 return;
405 if (bid_lzma ((void *)buf, 14) > 0)
407 this->compression_type = COMPRESSION_LZMA;
411 switch (compression_type)
413 case COMPRESSION_XZ:
414 ret = lzma_stream_decoder (&(state->stream),
415 (1U << 30),/* memlimit */
416 LZMA_CONCATENATED);
417 break;
418 case COMPRESSION_LZMA:
419 ret = lzma_alone_decoder (&(state->stream),
420 (1U << 30));/* memlimit */
421 break;
422 default:
423 this->lasterr = EINVAL;
424 return;
427 switch (ret)
429 case LZMA_OK:
430 break;
431 case LZMA_MEM_ERROR:
432 this->lasterr = ENOMEM;
433 break;
434 case LZMA_OPTIONS_ERROR:
435 this->lasterr = EINVAL;
436 break;
437 default:
438 this->lasterr = EINVAL;
439 break;
443 bool
444 compress_xz::is_xz_or_lzma (void * buffer, size_t len)
446 int bits_checked_xz;
447 int bits_checked_lzma;
449 bits_checked_xz = bid_xz (buffer, len);
450 if (bits_checked_xz)
451 return true;
453 bits_checked_lzma = bid_lzma (buffer, len);
454 if (bits_checked_lzma)
455 return true;
457 return false;
461 * Portions of bid_xz() and bid_lzma() have been adapted from the
462 * libarchive archive_read_support_compression_xz.c functions
463 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
464 * the 2-clause (simplified) BSD license, reproduced below.
466 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
467 * Copyright (c) 2009 Michihiro NAKAJIMA
468 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
469 * All rights reserved.
471 * Redistribution and use in source and binary forms, with or without
472 * modification, are permitted provided that the following conditions
473 * are met:
474 * 1. Redistributions of source code must retain the above copyright
475 * notice, this list of conditions and the following disclaimer.
476 * 2. Redistributions in binary form must reproduce the above copyright
477 * notice, this list of conditions and the following disclaimer in the
478 * documentation and/or other materials provided with the distribution.
480 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
481 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
482 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
483 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
484 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
485 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
486 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
487 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
488 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
489 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
492 compress_xz::bid_xz (void * buffer, size_t len)
494 const unsigned char *buf;
495 int bits_checked;
497 buf = (const unsigned char *)buffer;
498 if (len < 6)
500 /* not enough peek'ed data in buf */
501 return 0;
505 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
507 bits_checked = 0;
508 if (buf[0] != 0xFD)
509 return 0;
510 bits_checked += 8;
511 if (buf[1] != 0x37)
512 return 0;
513 bits_checked += 8;
514 if (buf[2] != 0x7A)
515 return 0;
516 bits_checked += 8;
517 if (buf[3] != 0x58)
518 return 0;
519 bits_checked += 8;
520 if (buf[4] != 0x5A)
521 return 0;
522 bits_checked += 8;
523 if (buf[5] != 0x00)
524 return 0;
525 bits_checked += 8;
527 #ifdef DEBUG
528 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked);
529 #endif
530 return (bits_checked);
534 compress_xz::bid_lzma (void * buffer, size_t len)
536 const unsigned char *buf;
537 uint32_t dicsize;
538 uint64_t uncompressed_size;
539 int bits_checked;
541 if (len < 14)
543 /* not enough peek'ed data in buffer */
544 return 0;
546 buf = (unsigned char *)buffer;
548 /* First byte of raw LZMA stream is commonly 0x5d.
549 * The first byte is a special number, which consists of
550 * three parameters of LZMA compression, a number of literal
551 * context bits(which is from 0 to 8, default is 3), a number
552 * of literal pos bits(which is from 0 to 4, default is 0),
553 * a number of pos bits(which is from 0 to 4, default is 2).
554 * The first byte is made by
555 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
556 * and so the default value in this field is
557 * (2 * 5 + 0) * 9 + 3 = 0x5d.
558 * lzma of LZMA SDK has options to change those parameters.
559 * It means a range of this field is from 0 to 224. And lzma of
560 * XZ Utils with option -e records 0x5e in this field. */
561 /* NOTE: If this checking of the first byte increases false
562 * recognition, we should allow only 0x5d and 0x5e for the first
563 * byte of LZMA stream. */
564 bits_checked = 0;
565 if (buf[0] > (4 * 5 + 4) * 9 + 8)
566 return 0;
567 /* Most likely value in the first byte of LZMA stream. */
568 if (buf[0] == 0x5d || buf[0] == 0x5e)
569 bits_checked += 8;
571 /* Sixth through fourteenth bytes are uncompressed size,
572 * stored in little-endian order. `-1' means uncompressed
573 * size is unknown and lzma of XZ Utils always records `-1'
574 * in this field. */
575 uncompressed_size = le64dec(buf+5);
576 if (uncompressed_size == (uint64_t)(-1))
577 bits_checked += 64;
579 /* Second through fifth bytes are dictionary size, stored in
580 * little-endian order. The minimum dictionary size is
581 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
582 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
583 * which the one uses with option -d27.
584 * NOTE: A comment of LZMA SDK source code says this dictionary
585 * range is from 1 << 12 to 1 << 30. */
586 dicsize = le32dec(buf+1);
587 switch (dicsize)
589 case 0x00001000:/* lzma of LZMA SDK option -d12. */
590 case 0x00002000:/* lzma of LZMA SDK option -d13. */
591 case 0x00004000:/* lzma of LZMA SDK option -d14. */
592 case 0x00008000:/* lzma of LZMA SDK option -d15. */
593 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
594 * lzma of LZMA SDK option -d16. */
595 case 0x00020000:/* lzma of LZMA SDK option -d17. */
596 case 0x00040000:/* lzma of LZMA SDK option -d18. */
597 case 0x00080000:/* lzma of XZ Utils option -2.
598 * lzma of LZMA SDK option -d19. */
599 case 0x00100000:/* lzma of XZ Utils option -3.
600 * lzma of LZMA SDK option -d20. */
601 case 0x00200000:/* lzma of XZ Utils option -4.
602 * lzma of LZMA SDK option -d21. */
603 case 0x00400000:/* lzma of XZ Utils option -5.
604 * lzma of LZMA SDK option -d22. */
605 case 0x00800000:/* lzma of XZ Utils option -6.
606 * lzma of LZMA SDK option -d23. */
607 case 0x01000000:/* lzma of XZ Utils option -7.
608 * lzma of LZMA SDK option -d24. */
609 case 0x02000000:/* lzma of XZ Utils option -8.
610 * lzma of LZMA SDK option -d25. */
611 case 0x04000000:/* lzma of XZ Utils option -9.
612 * lzma of LZMA SDK option -d26. */
613 case 0x08000000:/* lzma of LZMA SDK option -d27. */
614 bits_checked += 32;
615 break;
616 default:
617 /* If a memory usage for encoding was not enough on
618 * the platform where LZMA stream was made, lzma of
619 * XZ Utils automatically decreased the dictionary
620 * size to enough memory for encoding by 1Mi bytes
621 * (1 << 20).*/
622 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000
623 && (dicsize & ((1 << 20)-1)) == 0
624 && bits_checked == 8 + 64)
626 bits_checked += 32;
627 break;
629 /* Otherwise dictionary size is unlikely. But it is
630 * possible that someone makes lzma stream with
631 * liblzma/LZMA SDK in one's dictionary size. */
632 return 0;
635 /* TODO: The above test is still very weak. It would be
636 * good to do better. */
637 #ifdef DEBUG
638 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked);
639 #endif
640 return (bits_checked);