Regenerate Japanese resources
[cygwin-setup.git] / compress_xz.cc
blobb91c8a44ab9721e819f511e3943ea83823d961ce
1 /*
2 * Copyright (c) 2008, Charles Wilson
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
10 * http://www.gnu.org/
12 * Written by Charles Wilson <cygwin@cygwin.com>
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
17 * below).
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
23 #include <stdexcept>
25 #include <errno.h>
26 #include <memory.h>
27 #include <malloc.h>
29 static inline uint32_t
30 le32dec(const void *pp)
32 unsigned char const *p = (unsigned char const *)pp;
33 return ((p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
36 static inline uint64_t
37 le64dec(const void *pp)
39 unsigned char const *p = (unsigned char const *)pp;
40 return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
44 * Predicate: the stream is open for read.
46 compress_xz::compress_xz (io_stream * parent)
48 original(NULL),
49 owns_original(true),
50 peeklen(0),
51 lasterr(0),
52 compression_type (COMPRESSION_UNKNOWN)
54 /* read only */
55 if (!parent || parent->error())
57 lasterr = EBADF;
58 return;
60 original = parent;
62 create ();
63 init_decoder ();
66 void
67 compress_xz::create ()
69 unsigned char * out_block = NULL;
70 unsigned char * in_block = NULL;
72 state = (struct private_data *)calloc(sizeof(*state), 1);
73 out_block = (unsigned char *)malloc(out_block_size);
74 in_block = (unsigned char *)malloc(in_block_size);
75 if (state == NULL || out_block == NULL || in_block == NULL)
77 free(out_block);
78 free(in_block);
79 free(state);
80 lasterr = ENOMEM;
81 return;
84 memset(&(state->stream), 0x00, sizeof(state->stream));
85 state->out_block_size = out_block_size;
86 state->out_block = out_block;
87 state->in_block_size = in_block_size;
88 state->in_block = in_block;
89 state->out_p = state->out_block;
90 state->stream.avail_in = 0;
91 state->stream.next_out = state->out_block;
92 state->stream.avail_out = state->out_block_size;
95 ssize_t
96 compress_xz::read (void *buffer, size_t len)
98 if ( compression_type != COMPRESSION_XZ
99 && compression_type != COMPRESSION_LZMA)
101 return -1;
104 /* there is no recovery from a busted stream */
105 if (this->lasterr)
107 return -1;
109 if (len == 0)
111 return 0;
114 /* peekbuf is layered on top of existing buffering code */
115 if (this->peeklen)
117 ssize_t tmplen = std::min (this->peeklen, len);
118 this->peeklen -= tmplen;
119 memcpy (buffer, this->peekbuf, tmplen);
120 memmove (this->peekbuf, this->peekbuf + tmplen, sizeof(this->peekbuf) - tmplen);
121 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
122 if (tmpread >= 0)
123 return tmpread + tmplen;
124 else
125 return tmpread;
128 if (state->out_p < state->out_block + state->out_pos)
129 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
131 ssize_t tmplen = std::min ((size_t)(state->out_block + state->out_pos - state->out_p), len);
132 memcpy (buffer, state->out_p, tmplen);
133 state->out_p += tmplen;
134 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
135 if (tmpread >= 0)
136 return tmpread + tmplen;
137 else
138 return tmpread;
141 size_t lenRemaining = len;
142 unsigned char * bufp = (unsigned char *)buffer;
143 size_t avail_in = 0;
144 size_t avail_out = 0;
145 size_t decompressed = 0;
146 size_t consumed = 0;
147 /* if we made it here, any existing uncompressed data in out_block
148 * has been consumed, so reset out_p and out_pos
150 state->out_p = state->out_block;
151 state->out_pos = 0;
154 if (state->in_pos == state->in_size)
156 /* no compressed data ready; read some more */
157 state->in_size = (size_t) this->original->read(state->in_block, state->in_block_size);
158 /* We don't care for error vs EOF */
159 if (state->in_size < 0)
160 state->in_size = 0;
161 state->in_pos = 0;
164 avail_in = state->in_size - state->in_pos; /* will be 0 if EOF */
165 avail_out = state->out_block_size - state->out_pos;
167 state->stream.next_out = state->out_block + state->out_pos;
168 state->stream.avail_out = avail_out;
169 state->stream.next_in = state->in_block + state->in_pos;
170 state->stream.avail_in = avail_in;
172 lzma_ret res = lzma_code (&(state->stream),
173 (state->stream.avail_in == 0) ? LZMA_FINISH : LZMA_RUN);
175 consumed = avail_in - state->stream.avail_in;
176 decompressed = avail_out - state->stream.avail_out;
178 state->in_pos += consumed;
179 state->out_pos += decompressed;
181 ssize_t tmplen = std::min (decompressed, lenRemaining);
182 memcpy (bufp, state->out_p, tmplen);
183 state->out_p += tmplen;
184 bufp += tmplen;
185 lenRemaining -= tmplen;
186 state->total_out += decompressed;
187 state->total_in += consumed;
189 switch (res)
191 case LZMA_STREAM_END: /* Found end of stream. */
192 state->eof = 1;
193 /* FALL THROUGH */
194 case LZMA_OK: /* Decompressor made some progress. */
195 break;
196 case LZMA_MEM_ERROR:
197 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
198 this->lasterr = ENOMEM;
199 return -1;
200 case LZMA_MEMLIMIT_ERROR:
201 LogPlainPrintf ("Lzma library error: Out of memory\n");
202 this->lasterr = ENOMEM;
203 return -1;
204 case LZMA_FORMAT_ERROR:
205 LogPlainPrintf ("Lzma library error: format not recognized\n");
206 this->lasterr = EINVAL;
207 return -1;
208 case LZMA_OPTIONS_ERROR:
209 LogPlainPrintf ("Lzma library error: Invalid options\n");
210 this->lasterr = EINVAL;
211 return -1;
212 case LZMA_DATA_ERROR:
213 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
214 this->lasterr = EINVAL;
215 return -1;
216 case LZMA_BUF_ERROR:
217 LogPlainPrintf ("Lzma library error: No progress is possible\n");
218 this->lasterr = EINVAL;
219 return -1;
220 case LZMA_PROG_ERROR:
221 LogPlainPrintf ("Lzma library error: Internal error\n");
222 this->lasterr = EINVAL;
223 return -1;
224 default:
225 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res);
226 this->lasterr = EINVAL;
227 return -1;
230 while (lenRemaining != 0 && !state->eof);
232 return (len - lenRemaining);
235 ssize_t
236 compress_xz::write (const void *buffer, size_t len)
238 throw new std::logic_error("compress_xz::write is not implemented");
241 ssize_t
242 compress_xz::peek (void *buffer, size_t len)
244 /* can only peek 512 bytes */
245 if (len > 512)
246 return ENOMEM;
248 if (len > this->peeklen)
250 size_t want = len - this->peeklen;
251 ssize_t got = read (&(this->peekbuf[peeklen]), want);
252 if (got >= 0)
253 this->peeklen += got;
254 else
255 /* error */
256 return got;
257 /* we may have read less than requested. */
258 memcpy (buffer, this->peekbuf, this->peeklen);
259 return this->peeklen;
261 else
263 memcpy (buffer, this->peekbuf, len);
264 return len;
266 return 0;
269 off_t
270 compress_xz::tell ()
272 throw new std::logic_error("compress_xz::tell is not implemented");
275 off_t
276 compress_xz::seek (off_t where, io_stream_seek_t whence)
278 if ((whence == IO_SEEK_SET) && (where == 0))
280 off_t result = original->seek(where, whence);
281 destroy ();
282 peeklen = 0;
283 lasterr = 0;
284 create ();
285 init_decoder ();
286 return result;
289 throw new std::logic_error("compress_xz::seek is not implemented");
293 compress_xz::error ()
295 return lasterr;
299 compress_xz::set_mtime (time_t mtime)
301 if (original)
302 return original->set_mtime (mtime);
303 return 1;
306 time_t
307 compress_xz::get_mtime ()
309 if (original)
310 return original->get_mtime ();
311 return 0;
314 mode_t
315 compress_xz::get_mode ()
317 if (original)
318 return original->get_mode ();
319 return 0;
322 void
323 compress_xz::release_original ()
325 owns_original = false;
328 void
329 compress_xz::destroy ()
331 if (state)
333 if ( compression_type == COMPRESSION_XZ
334 || compression_type == COMPRESSION_LZMA)
336 lzma_end(&(state->stream));
339 if (state->out_block)
341 free (state->out_block);
342 state->out_block = NULL;
345 if (state->in_block)
347 free (state->in_block);
348 state->in_block = NULL;
351 free(state);
352 state = NULL;
354 compression_type = COMPRESSION_UNKNOWN;
358 compress_xz::~compress_xz ()
360 destroy ();
362 if (original && owns_original)
363 delete original;
366 /* ===========================================================================
367 * Check the header of a lzma_stream opened for reading, and initialize
368 * the appropriate decoder (xz or lzma).
369 * IN assertion:
370 * the stream has already been created sucessfully
371 * this method is called only once per stream
372 * OUT assertion - success:
373 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
374 * state->stream is initialized with the appropriate decoder
375 * lzma: the first 14 bytes of the stream are read (+ whatever
376 * the decoder itself consumes on initialization)
377 * xz: the first 6 bytes of the stram are read (+ whatever the
378 * decoder itself consumes on initialization)
379 * last_error is zero
380 * OUT assertion - error:
381 * last_error is non-zero
383 void
384 compress_xz::init_decoder (void)
386 unsigned char buf[14];
387 int ret;
388 this->compression_type = COMPRESSION_UNKNOWN;
390 /* read properties */
391 if (this->original->peek (buf, 6) != 6)
393 this->lasterr = (errno ? errno : EIO);
394 return;
397 if (bid_xz ((void *)buf, 6) > 0)
399 this->compression_type = COMPRESSION_XZ;
401 else
403 if (this->original->peek (buf + 6, 8) != 8)
405 this->lasterr = (errno ? errno : EIO);
406 return;
408 if (bid_lzma ((void *)buf, 14) > 0)
410 this->compression_type = COMPRESSION_LZMA;
414 switch (compression_type)
416 case COMPRESSION_XZ:
417 ret = lzma_stream_decoder (&(state->stream),
418 (1U << 30),/* memlimit */
419 LZMA_CONCATENATED);
420 break;
421 case COMPRESSION_LZMA:
422 ret = lzma_alone_decoder (&(state->stream),
423 (1U << 30));/* memlimit */
424 break;
425 default:
426 this->lasterr = EINVAL;
427 return;
430 switch (ret)
432 case LZMA_OK:
433 break;
434 case LZMA_MEM_ERROR:
435 this->lasterr = ENOMEM;
436 break;
437 case LZMA_OPTIONS_ERROR:
438 this->lasterr = EINVAL;
439 break;
440 default:
441 this->lasterr = EINVAL;
442 break;
446 bool
447 compress_xz::is_xz_or_lzma (void * buffer, size_t len)
449 int bits_checked_xz;
450 int bits_checked_lzma;
452 bits_checked_xz = bid_xz (buffer, len);
453 if (bits_checked_xz)
454 return true;
456 bits_checked_lzma = bid_lzma (buffer, len);
457 if (bits_checked_lzma)
458 return true;
460 return false;
464 * Portions of bid_xz() and bid_lzma() have been adapted from the
465 * libarchive archive_read_support_compression_xz.c functions
466 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
467 * the 2-clause (simplified) BSD license, reproduced below.
469 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
470 * Copyright (c) 2009 Michihiro NAKAJIMA
471 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
472 * All rights reserved.
474 * Redistribution and use in source and binary forms, with or without
475 * modification, are permitted provided that the following conditions
476 * are met:
477 * 1. Redistributions of source code must retain the above copyright
478 * notice, this list of conditions and the following disclaimer.
479 * 2. Redistributions in binary form must reproduce the above copyright
480 * notice, this list of conditions and the following disclaimer in the
481 * documentation and/or other materials provided with the distribution.
483 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
484 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
485 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
486 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
487 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
488 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
489 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
490 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
491 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
492 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
495 compress_xz::bid_xz (void * buffer, size_t len)
497 const unsigned char *buf;
498 int bits_checked;
500 buf = (const unsigned char *)buffer;
501 if (len < 6)
503 /* not enough peek'ed data in buf */
504 return 0;
508 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
510 bits_checked = 0;
511 if (buf[0] != 0xFD)
512 return 0;
513 bits_checked += 8;
514 if (buf[1] != 0x37)
515 return 0;
516 bits_checked += 8;
517 if (buf[2] != 0x7A)
518 return 0;
519 bits_checked += 8;
520 if (buf[3] != 0x58)
521 return 0;
522 bits_checked += 8;
523 if (buf[4] != 0x5A)
524 return 0;
525 bits_checked += 8;
526 if (buf[5] != 0x00)
527 return 0;
528 bits_checked += 8;
530 #ifdef DEBUG
531 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked);
532 #endif
533 return (bits_checked);
537 compress_xz::bid_lzma (void * buffer, size_t len)
539 const unsigned char *buf;
540 uint32_t dicsize;
541 uint64_t uncompressed_size;
542 int bits_checked;
544 if (len < 14)
546 /* not enough peek'ed data in buffer */
547 return 0;
549 buf = (unsigned char *)buffer;
551 /* First byte of raw LZMA stream is commonly 0x5d.
552 * The first byte is a special number, which consists of
553 * three parameters of LZMA compression, a number of literal
554 * context bits(which is from 0 to 8, default is 3), a number
555 * of literal pos bits(which is from 0 to 4, default is 0),
556 * a number of pos bits(which is from 0 to 4, default is 2).
557 * The first byte is made by
558 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
559 * and so the default value in this field is
560 * (2 * 5 + 0) * 9 + 3 = 0x5d.
561 * lzma of LZMA SDK has options to change those parameters.
562 * It means a range of this field is from 0 to 224. And lzma of
563 * XZ Utils with option -e records 0x5e in this field. */
564 /* NOTE: If this checking of the first byte increases false
565 * recognition, we should allow only 0x5d and 0x5e for the first
566 * byte of LZMA stream. */
567 bits_checked = 0;
568 if (buf[0] > (4 * 5 + 4) * 9 + 8)
569 return 0;
570 /* Most likely value in the first byte of LZMA stream. */
571 if (buf[0] == 0x5d || buf[0] == 0x5e)
572 bits_checked += 8;
574 /* Sixth through fourteenth bytes are uncompressed size,
575 * stored in little-endian order. `-1' means uncompressed
576 * size is unknown and lzma of XZ Utils always records `-1'
577 * in this field. */
578 uncompressed_size = le64dec(buf+5);
579 if (uncompressed_size == (uint64_t)(-1))
580 bits_checked += 64;
582 /* Second through fifth bytes are dictionary size, stored in
583 * little-endian order. The minimum dictionary size is
584 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
585 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
586 * which the one uses with option -d27.
587 * NOTE: A comment of LZMA SDK source code says this dictionary
588 * range is from 1 << 12 to 1 << 30. */
589 dicsize = le32dec(buf+1);
590 switch (dicsize)
592 case 0x00001000:/* lzma of LZMA SDK option -d12. */
593 case 0x00002000:/* lzma of LZMA SDK option -d13. */
594 case 0x00004000:/* lzma of LZMA SDK option -d14. */
595 case 0x00008000:/* lzma of LZMA SDK option -d15. */
596 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
597 * lzma of LZMA SDK option -d16. */
598 case 0x00020000:/* lzma of LZMA SDK option -d17. */
599 case 0x00040000:/* lzma of LZMA SDK option -d18. */
600 case 0x00080000:/* lzma of XZ Utils option -2.
601 * lzma of LZMA SDK option -d19. */
602 case 0x00100000:/* lzma of XZ Utils option -3.
603 * lzma of LZMA SDK option -d20. */
604 case 0x00200000:/* lzma of XZ Utils option -4.
605 * lzma of LZMA SDK option -d21. */
606 case 0x00400000:/* lzma of XZ Utils option -5.
607 * lzma of LZMA SDK option -d22. */
608 case 0x00800000:/* lzma of XZ Utils option -6.
609 * lzma of LZMA SDK option -d23. */
610 case 0x01000000:/* lzma of XZ Utils option -7.
611 * lzma of LZMA SDK option -d24. */
612 case 0x02000000:/* lzma of XZ Utils option -8.
613 * lzma of LZMA SDK option -d25. */
614 case 0x04000000:/* lzma of XZ Utils option -9.
615 * lzma of LZMA SDK option -d26. */
616 case 0x08000000:/* lzma of LZMA SDK option -d27. */
617 bits_checked += 32;
618 break;
619 default:
620 /* If a memory usage for encoding was not enough on
621 * the platform where LZMA stream was made, lzma of
622 * XZ Utils automatically decreased the dictionary
623 * size to enough memory for encoding by 1Mi bytes
624 * (1 << 20).*/
625 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000
626 && (dicsize & ((1 << 20)-1)) == 0
627 && bits_checked == 8 + 64)
629 bits_checked += 32;
630 break;
632 /* Otherwise dictionary size is unlikely. But it is
633 * possible that someone makes lzma stream with
634 * liblzma/LZMA SDK in one's dictionary size. */
635 return 0;
638 /* TODO: The above test is still very weak. It would be
639 * good to do better. */
640 #ifdef DEBUG
641 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked);
642 #endif
643 return (bits_checked);