2 * Copyright (c) 2008 Joerg Sonnenberger
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * Copyright (c) 1985, 1986, 1992, 1993
28 * The Regents of the University of California. All rights reserved.
30 * This code is derived from software contributed to Berkeley by
31 * Diomidis Spinellis and James A. Woods, derived from original
32 * work by Spencer Thomas and Joseph Orost.
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 #include "archive_platform.h"
61 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_compression_compress.c 201111 2009-12-28 03:33:05Z kientzle $");
74 #include "archive_private.h"
75 #include "archive_write_private.h"
77 #define HSIZE 69001 /* 95% occupancy */
78 #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
79 #define CHECK_GAP 10000 /* Ratio check interval. */
81 #define MAXCODE(bits) ((1 << (bits)) - 1)
84 * the next two codes should not be changed lightly, as they must not
85 * lie within the contiguous general code space.
87 #define FIRST 257 /* First free entry. */
88 #define CLEAR 256 /* Table clear output code. */
91 int64_t in_count
, out_count
, checkpoint
;
93 int code_len
; /* Number of bits/code. */
94 int cur_maxcode
; /* Maximum code, given n_bits. */
95 int max_maxcode
; /* Should NEVER generate this code. */
97 unsigned short codetab
[HSIZE
];
98 int first_free
; /* First unused entry. */
101 int cur_code
, cur_fcode
;
104 unsigned char bit_buf
;
106 unsigned char *compressed
;
107 size_t compressed_buffer_size
;
108 size_t compressed_offset
;
111 static int archive_compressor_compress_open(struct archive_write_filter
*);
112 static int archive_compressor_compress_write(struct archive_write_filter
*,
113 const void *, size_t);
114 static int archive_compressor_compress_close(struct archive_write_filter
*);
115 static int archive_compressor_compress_free(struct archive_write_filter
*);
117 #if ARCHIVE_VERSION_NUMBER < 4000000
119 archive_write_set_compression_compress(struct archive
*a
)
121 __archive_write_filters_free(a
);
122 return (archive_write_add_filter_compress(a
));
127 * Add a compress filter to this write handle.
130 archive_write_add_filter_compress(struct archive
*_a
)
132 struct archive_write
*a
= (struct archive_write
*)_a
;
133 struct archive_write_filter
*f
= __archive_write_allocate_filter(_a
);
135 archive_check_magic(&a
->archive
, ARCHIVE_WRITE_MAGIC
,
136 ARCHIVE_STATE_NEW
, "archive_write_add_filter_compress");
137 f
->open
= &archive_compressor_compress_open
;
138 f
->code
= ARCHIVE_FILTER_COMPRESS
;
139 f
->name
= "compress";
147 archive_compressor_compress_open(struct archive_write_filter
*f
)
150 struct private_data
*state
;
151 size_t bs
= 65536, bpb
;
153 f
->code
= ARCHIVE_FILTER_COMPRESS
;
154 f
->name
= "compress";
156 ret
= __archive_write_open_filter(f
->next_filter
);
157 if (ret
!= ARCHIVE_OK
)
160 state
= (struct private_data
*)calloc(1, sizeof(*state
));
162 archive_set_error(f
->archive
, ENOMEM
,
163 "Can't allocate data for compression");
164 return (ARCHIVE_FATAL
);
167 if (f
->archive
->magic
== ARCHIVE_WRITE_MAGIC
) {
168 /* Buffer size should be a multiple number of the of bytes
169 * per block for performance. */
170 bpb
= archive_write_get_bytes_per_block(f
->archive
);
176 state
->compressed_buffer_size
= bs
;
177 state
->compressed
= malloc(state
->compressed_buffer_size
);
179 if (state
->compressed
== NULL
) {
180 archive_set_error(f
->archive
, ENOMEM
,
181 "Can't allocate data for compression buffer");
183 return (ARCHIVE_FATAL
);
186 f
->write
= archive_compressor_compress_write
;
187 f
->close
= archive_compressor_compress_close
;
188 f
->free
= archive_compressor_compress_free
;
190 state
->max_maxcode
= 0x10000; /* Should NEVER generate this code. */
191 state
->in_count
= 0; /* Length of input. */
193 state
->bit_offset
= 0;
194 state
->out_count
= 3; /* Includes 3-byte header mojo. */
195 state
->compress_ratio
= 0;
196 state
->checkpoint
= CHECK_GAP
;
198 state
->cur_maxcode
= MAXCODE(state
->code_len
);
199 state
->first_free
= FIRST
;
201 memset(state
->hashtab
, 0xff, sizeof(state
->hashtab
));
203 /* Prime output buffer with a gzip header. */
204 state
->compressed
[0] = 0x1f; /* Compress */
205 state
->compressed
[1] = 0x9d;
206 state
->compressed
[2] = 0x90; /* Block mode, 16bit max */
207 state
->compressed_offset
= 3;
214 * Output the given code.
216 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
217 * that n_bits <= (long)wordsize - 1.
219 * Outputs code to the file.
221 * Chars are 8 bits long.
223 * Maintain a BITS character long buffer (so that 8 codes will
224 * fit in it exactly). Use the VAX insv instruction to insert each
225 * code in turn. When the buffer fills up empty it and start over.
228 static const unsigned char rmask
[9] =
229 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
232 output_byte(struct archive_write_filter
*f
, unsigned char c
)
234 struct private_data
*state
= f
->data
;
236 state
->compressed
[state
->compressed_offset
++] = c
;
239 if (state
->compressed_buffer_size
== state
->compressed_offset
) {
240 int ret
= __archive_write_filter(f
->next_filter
,
241 state
->compressed
, state
->compressed_buffer_size
);
242 if (ret
!= ARCHIVE_OK
)
243 return ARCHIVE_FATAL
;
244 state
->compressed_offset
= 0;
251 output_code(struct archive_write_filter
*f
, int ocode
)
253 struct private_data
*state
= f
->data
;
254 int bits
, ret
, clear_flg
, bit_offset
;
256 clear_flg
= ocode
== CLEAR
;
259 * Since ocode is always >= 8 bits, only need to mask the first
262 bit_offset
= state
->bit_offset
% 8;
263 state
->bit_buf
|= (ocode
<< bit_offset
) & 0xff;
264 output_byte(f
, state
->bit_buf
);
266 bits
= state
->code_len
- (8 - bit_offset
);
267 ocode
>>= 8 - bit_offset
;
268 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
270 output_byte(f
, ocode
& 0xff);
275 state
->bit_offset
+= state
->code_len
;
276 state
->bit_buf
= ocode
& rmask
[bits
];
277 if (state
->bit_offset
== state
->code_len
* 8)
278 state
->bit_offset
= 0;
281 * If the next entry is going to be too big for the ocode size,
282 * then increase it, if possible.
284 if (clear_flg
|| state
->first_free
> state
->cur_maxcode
) {
286 * Write the whole buffer, because the input side won't
287 * discover the size increase until after it has read it.
289 if (state
->bit_offset
> 0) {
290 while (state
->bit_offset
< state
->code_len
* 8) {
291 ret
= output_byte(f
, state
->bit_buf
);
292 if (ret
!= ARCHIVE_OK
)
294 state
->bit_offset
+= 8;
299 state
->bit_offset
= 0;
303 state
->cur_maxcode
= MAXCODE(state
->code_len
);
306 if (state
->code_len
== 16)
307 state
->cur_maxcode
= state
->max_maxcode
;
309 state
->cur_maxcode
= MAXCODE(state
->code_len
);
317 output_flush(struct archive_write_filter
*f
)
319 struct private_data
*state
= f
->data
;
322 /* At EOF, write the rest of the buffer. */
323 if (state
->bit_offset
% 8) {
324 state
->code_len
= (state
->bit_offset
% 8 + 7) / 8;
325 ret
= output_byte(f
, state
->bit_buf
);
326 if (ret
!= ARCHIVE_OK
)
334 * Write data to the compressed stream.
337 archive_compressor_compress_write(struct archive_write_filter
*f
,
338 const void *buff
, size_t length
)
340 struct private_data
*state
= (struct private_data
*)f
->data
;
344 const unsigned char *bp
;
351 if (state
->in_count
== 0) {
352 state
->cur_code
= *bp
++;
360 state
->cur_fcode
= (c
<< 16) + state
->cur_code
;
361 i
= ((c
<< HSHIFT
) ^ state
->cur_code
); /* Xor hashing. */
363 if (state
->hashtab
[i
] == state
->cur_fcode
) {
364 state
->cur_code
= state
->codetab
[i
];
367 if (state
->hashtab
[i
] < 0) /* Empty slot. */
369 /* Secondary hash (after G. Knott). */
378 if (state
->hashtab
[i
] == state
->cur_fcode
) {
379 state
->cur_code
= state
->codetab
[i
];
382 if (state
->hashtab
[i
] >= 0)
385 ret
= output_code(f
, state
->cur_code
);
386 if (ret
!= ARCHIVE_OK
)
389 if (state
->first_free
< state
->max_maxcode
) {
390 state
->codetab
[i
] = state
->first_free
++; /* code -> hashtable */
391 state
->hashtab
[i
] = state
->cur_fcode
;
394 if (state
->in_count
< state
->checkpoint
)
397 state
->checkpoint
= state
->in_count
+ CHECK_GAP
;
399 if (state
->in_count
<= 0x007fffff && state
->out_count
!= 0)
400 ratio
= (int)(state
->in_count
* 256 / state
->out_count
);
401 else if ((ratio
= (int)(state
->out_count
/ 256)) == 0)
404 ratio
= (int)(state
->in_count
/ ratio
);
406 if (ratio
> state
->compress_ratio
)
407 state
->compress_ratio
= ratio
;
409 state
->compress_ratio
= 0;
410 memset(state
->hashtab
, 0xff, sizeof(state
->hashtab
));
411 state
->first_free
= FIRST
;
412 ret
= output_code(f
, CLEAR
);
413 if (ret
!= ARCHIVE_OK
)
423 * Finish the compression...
426 archive_compressor_compress_close(struct archive_write_filter
*f
)
428 struct private_data
*state
= (struct private_data
*)f
->data
;
431 ret
= output_code(f
, state
->cur_code
);
432 if (ret
!= ARCHIVE_OK
)
434 ret
= output_flush(f
);
435 if (ret
!= ARCHIVE_OK
)
438 /* Write the last block */
439 ret
= __archive_write_filter(f
->next_filter
,
440 state
->compressed
, state
->compressed_offset
);
442 ret2
= __archive_write_close_filter(f
->next_filter
);
445 free(state
->compressed
);
451 archive_compressor_compress_free(struct archive_write_filter
*f
)
453 (void)f
; /* UNUSED */