2 * Copyright (c) 2008 Joerg Sonnenberger
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * Copyright (c) 1985, 1986, 1992, 1993
28 * The Regents of the University of California. All rights reserved.
30 * This code is derived from software contributed to Berkeley by
31 * Diomidis Spinellis and James A. Woods, derived from original
32 * work by Spencer Thomas and Joseph Orost.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 #include "archive_platform.h"
61 __FBSDID("$FreeBSD: src/lib/libarchive/archive_write_set_compression_compress.c,v 1.1 2008/03/14 20:35:37 kientzle Exp $");
74 #include "archive_private.h"
75 #include "archive_write_private.h"
77 #define HSIZE 69001 /* 95% occupancy */
78 #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
79 #define CHECK_GAP 10000 /* Ratio check interval. */
81 #define MAXCODE(bits) ((1 << (bits)) - 1)
84 * the next two codes should not be changed lightly, as they must not
85 * lie within the contiguous general code space.
87 #define FIRST 257 /* First free entry. */
88 #define CLEAR 256 /* Table clear output code. */
91 off_t in_count
, out_count
, checkpoint
;
93 int code_len
; /* Number of bits/code. */
94 int cur_maxcode
; /* Maximum code, given n_bits. */
95 int max_maxcode
; /* Should NEVER generate this code. */
97 unsigned short codetab
[HSIZE
];
98 int first_free
; /* First unused entry. */
101 int cur_code
, cur_fcode
;
104 unsigned char bit_buf
;
106 unsigned char *compressed
;
107 size_t compressed_buffer_size
;
108 size_t compressed_offset
;
111 static int archive_compressor_compress_finish(struct archive_write
*);
112 static int archive_compressor_compress_init(struct archive_write
*);
113 static int archive_compressor_compress_write(struct archive_write
*,
114 const void *, size_t);
117 * Allocate, initialize and return a archive object.
120 archive_write_set_compression_compress(struct archive
*_a
)
122 struct archive_write
*a
= (struct archive_write
*)_a
;
123 __archive_check_magic(&a
->archive
, ARCHIVE_WRITE_MAGIC
,
124 ARCHIVE_STATE_NEW
, "archive_write_set_compression_compress");
125 a
->compressor
.init
= &archive_compressor_compress_init
;
126 a
->archive
.compression_code
= ARCHIVE_COMPRESSION_COMPRESS
;
127 a
->archive
.compression_name
= "compress";
135 archive_compressor_compress_init(struct archive_write
*a
)
138 struct private_data
*state
;
140 a
->archive
.compression_code
= ARCHIVE_COMPRESSION_COMPRESS
;
141 a
->archive
.compression_name
= "compress";
143 if (a
->bytes_per_block
< 4) {
144 archive_set_error(&a
->archive
, EINVAL
,
145 "Can't write Compress header as single block");
146 return (ARCHIVE_FATAL
);
149 if (a
->client_opener
!= NULL
) {
150 ret
= (a
->client_opener
)(&a
->archive
, a
->client_data
);
151 if (ret
!= ARCHIVE_OK
)
155 state
= (struct private_data
*)malloc(sizeof(*state
));
157 archive_set_error(&a
->archive
, ENOMEM
,
158 "Can't allocate data for compression");
159 return (ARCHIVE_FATAL
);
161 memset(state
, 0, sizeof(*state
));
163 state
->compressed_buffer_size
= a
->bytes_per_block
;
164 state
->compressed
= malloc(state
->compressed_buffer_size
);
166 if (state
->compressed
== NULL
) {
167 archive_set_error(&a
->archive
, ENOMEM
,
168 "Can't allocate data for compression buffer");
170 return (ARCHIVE_FATAL
);
173 a
->compressor
.write
= archive_compressor_compress_write
;
174 a
->compressor
.finish
= archive_compressor_compress_finish
;
176 state
->max_maxcode
= 0x10000; /* Should NEVER generate this code. */
177 state
->in_count
= 0; /* Length of input. */
179 state
->bit_offset
= 0;
180 state
->out_count
= 3; /* Includes 3-byte header mojo. */
181 state
->compress_ratio
= 0;
182 state
->checkpoint
= CHECK_GAP
;
184 state
->cur_maxcode
= MAXCODE(state
->code_len
);
185 state
->first_free
= FIRST
;
187 memset(state
->hashtab
, 0xff, sizeof(state
->hashtab
));
189 /* Prime output buffer with a gzip header. */
190 state
->compressed
[0] = 0x1f; /* Compress */
191 state
->compressed
[1] = 0x9d;
192 state
->compressed
[2] = 0x90; /* Block mode, 16bit max */
193 state
->compressed_offset
= 3;
195 a
->compressor
.data
= state
;
200 * Output the given code.
202 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
203 * that n_bits =< (long)wordsize - 1.
205 * Outputs code to the file.
207 * Chars are 8 bits long.
209 * Maintain a BITS character long buffer (so that 8 codes will
210 * fit in it exactly). Use the VAX insv instruction to insert each
211 * code in turn. When the buffer fills up empty it and start over.
214 static unsigned char rmask
[9] =
215 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
218 output_byte(struct archive_write
*a
, unsigned char c
)
220 struct private_data
*state
= a
->compressor
.data
;
221 ssize_t bytes_written
;
223 state
->compressed
[state
->compressed_offset
++] = c
;
226 if (state
->compressed_buffer_size
== state
->compressed_offset
) {
227 bytes_written
= (a
->client_writer
)(&a
->archive
,
229 state
->compressed
, state
->compressed_buffer_size
);
230 if (bytes_written
<= 0)
231 return ARCHIVE_FATAL
;
232 a
->archive
.raw_position
+= bytes_written
;
233 state
->compressed_offset
= 0;
240 output_code(struct archive_write
*a
, int ocode
)
242 struct private_data
*state
= a
->compressor
.data
;
243 int bits
, ret
, clear_flg
, bit_offset
;
245 clear_flg
= ocode
== CLEAR
;
246 bits
= state
->code_len
;
249 * Since ocode is always >= 8 bits, only need to mask the first
252 bit_offset
= state
->bit_offset
% 8;
253 state
->bit_buf
|= (ocode
<< bit_offset
) & 0xff;
254 output_byte(a
, state
->bit_buf
);
256 bits
= state
->code_len
- (8 - bit_offset
);
257 ocode
>>= 8 - bit_offset
;
258 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
260 output_byte(a
, ocode
& 0xff);
265 state
->bit_offset
+= state
->code_len
;
266 state
->bit_buf
= ocode
& rmask
[bits
];
267 if (state
->bit_offset
== state
->code_len
* 8)
268 state
->bit_offset
= 0;
271 * If the next entry is going to be too big for the ocode size,
272 * then increase it, if possible.
274 if (clear_flg
|| state
->first_free
> state
->cur_maxcode
) {
276 * Write the whole buffer, because the input side won't
277 * discover the size increase until after it has read it.
279 if (state
->bit_offset
> 0) {
280 while (state
->bit_offset
< state
->code_len
* 8) {
281 ret
= output_byte(a
, state
->bit_buf
);
282 if (ret
!= ARCHIVE_OK
)
284 state
->bit_offset
+= 8;
289 state
->bit_offset
= 0;
293 state
->cur_maxcode
= MAXCODE(state
->code_len
);
296 if (state
->code_len
== 16)
297 state
->cur_maxcode
= state
->max_maxcode
;
299 state
->cur_maxcode
= MAXCODE(state
->code_len
);
307 output_flush(struct archive_write
*a
)
309 struct private_data
*state
= a
->compressor
.data
;
312 /* At EOF, write the rest of the buffer. */
313 if (state
->bit_offset
% 8) {
314 state
->code_len
= (state
->bit_offset
% 8 + 7) / 8;
315 ret
= output_byte(a
, state
->bit_buf
);
316 if (ret
!= ARCHIVE_OK
)
324 * Write data to the compressed stream.
327 archive_compressor_compress_write(struct archive_write
*a
, const void *buff
,
330 struct private_data
*state
;
334 const unsigned char *bp
;
336 state
= (struct private_data
*)a
->compressor
.data
;
337 if (a
->client_writer
== NULL
) {
338 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_PROGRAMMER
,
339 "No write callback is registered? "
340 "This is probably an internal programming error.");
341 return (ARCHIVE_FATAL
);
349 if (state
->in_count
== 0) {
350 state
->cur_code
= *bp
++;
358 state
->cur_fcode
= (c
<< 16) + state
->cur_code
;
359 i
= ((c
<< HSHIFT
) ^ state
->cur_code
); /* Xor hashing. */
361 if (state
->hashtab
[i
] == state
->cur_fcode
) {
362 state
->cur_code
= state
->codetab
[i
];
365 if (state
->hashtab
[i
] < 0) /* Empty slot. */
367 /* Secondary hash (after G. Knott). */
376 if (state
->hashtab
[i
] == state
->cur_fcode
) {
377 state
->cur_code
= state
->codetab
[i
];
380 if (state
->hashtab
[i
] >= 0)
383 ret
= output_code(a
, state
->cur_code
);
384 if (ret
!= ARCHIVE_OK
)
387 if (state
->first_free
< state
->max_maxcode
) {
388 state
->codetab
[i
] = state
->first_free
++; /* code -> hashtable */
389 state
->hashtab
[i
] = state
->cur_fcode
;
392 if (state
->in_count
< state
->checkpoint
)
395 state
->checkpoint
= state
->in_count
+ CHECK_GAP
;
397 if (state
->in_count
<= 0x007fffff)
398 ratio
= state
->in_count
* 256 / state
->out_count
;
399 else if ((ratio
= state
->out_count
/ 256) == 0)
402 ratio
= state
->in_count
/ ratio
;
404 if (ratio
> state
->compress_ratio
)
405 state
->compress_ratio
= ratio
;
407 state
->compress_ratio
= 0;
408 memset(state
->hashtab
, 0xff, sizeof(state
->hashtab
));
409 state
->first_free
= FIRST
;
410 ret
= output_code(a
, CLEAR
);
411 if (ret
!= ARCHIVE_OK
)
421 * Finish the compression...
424 archive_compressor_compress_finish(struct archive_write
*a
)
426 ssize_t block_length
, target_block_length
, bytes_written
;
428 struct private_data
*state
;
431 state
= (struct private_data
*)a
->compressor
.data
;
433 if (a
->client_writer
== NULL
) {
434 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_PROGRAMMER
,
435 "No write callback is registered? "
436 "This is probably an internal programming error.");
441 /* By default, always pad the uncompressed data. */
442 if (a
->pad_uncompressed
) {
443 while (state
->in_count
% a
->bytes_per_block
!= 0) {
444 tocopy
= a
->bytes_per_block
-
445 (state
->in_count
% a
->bytes_per_block
);
446 if (tocopy
> a
->null_length
)
447 tocopy
= a
->null_length
;
448 ret
= archive_compressor_compress_write(a
, a
->nulls
,
450 if (ret
!= ARCHIVE_OK
)
455 ret
= output_code(a
, state
->cur_code
);
456 if (ret
!= ARCHIVE_OK
)
458 ret
= output_flush(a
);
459 if (ret
!= ARCHIVE_OK
)
462 /* Optionally, pad the final compressed block. */
463 block_length
= state
->compressed_offset
;
465 /* Tricky calculation to determine size of last block. */
466 if (a
->bytes_in_last_block
<= 0)
467 /* Default or Zero: pad to full block */
468 target_block_length
= a
->bytes_per_block
;
470 /* Round length to next multiple of bytes_in_last_block. */
471 target_block_length
= a
->bytes_in_last_block
*
472 ( (block_length
+ a
->bytes_in_last_block
- 1) /
473 a
->bytes_in_last_block
);
474 if (target_block_length
> a
->bytes_per_block
)
475 target_block_length
= a
->bytes_per_block
;
476 if (block_length
< target_block_length
) {
477 memset(state
->compressed
+ state
->compressed_offset
, 0,
478 target_block_length
- block_length
);
479 block_length
= target_block_length
;
482 /* Write the last block */
483 bytes_written
= (a
->client_writer
)(&a
->archive
, a
->client_data
,
484 state
->compressed
, block_length
);
485 if (bytes_written
<= 0)
488 a
->archive
.raw_position
+= bytes_written
;
491 free(state
->compressed
);