Initial commit.
[CMakeLuaTailorHgBridge.git] / CMakeLua / Utilities / cmcompress / cmcompress.c
blob59978f679ebb9fd8ad88850304e955dc912f39be
1 /*
2 * Copyright (c) 1985, 1986 The Regents of the University of California.
3 * All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods, derived from original work by Spencer Thomas
7 * and Joseph Orost.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
38 #include "cmcompress.h"
40 #include <errno.h>
41 #include <string.h>
43 static const char_type magic_header[] = { "\037\235" }; /* 1F 9D */
45 /* Defines for third byte of header */
46 #define BIT_MASK 0x1f
47 #define BLOCK_MASK 0x80
48 #define CHECK_GAP 10000 /* ratio check interval */
49 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
50 a fourth header byte (for expansion).
52 #define INIT_BITS 9 /* initial number of bits/code */
54 #ifdef COMPATIBLE /* But wrong! */
55 # define MAXCODE(n_bits) (1 << (n_bits) - 1)
56 #else
57 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
58 #endif /* COMPATIBLE */
60 #define htabof(i) cdata->htab[i]
61 #define codetabof(i) cdata->codetab[i]
64 * the next two codes should not be changed lightly, as they must not
65 * lie within the contiguous general code space.
67 #define FIRST 257 /* first free entry */
68 #define CLEAR 256 /* table clear output code */
70 #ifdef DEBUG
71 static void prratio( FILE *stream, long int num, long int den);
72 #endif
74 int cmcompress_compress_initialize(struct cmcompress_stream* cdata)
76 cdata->maxbits = BITS; /* user settable max # bits/code */
77 cdata->maxmaxcode = 1 << BITS; /* should NEVER generate this code */
78 cdata->hsize = HSIZE; /* for dynamic table sizing */
79 cdata->free_ent = 0; /* first unused entry */
80 cdata->nomagic = 0; /* Use a 3-byte magic number header, unless old file */
81 cdata->block_compress = BLOCK_MASK;
82 cdata->clear_flg = 0;
83 cdata->ratio = 0;
84 cdata->checkpoint = CHECK_GAP;
86 cdata->input_stream = 0;
87 cdata->output_stream = 0;
88 cdata->client_data = 0;
89 return 1;
92 static void cl_hash(struct cmcompress_stream* cdata, count_int hsize) /* reset code table */
94 register count_int *htab_p = cdata->htab+hsize;
95 register long i;
96 register long m1 = -1;
98 i = hsize - 16;
100 { /* might use Sys V memset(3) here */
101 *(htab_p-16) = m1;
102 *(htab_p-15) = m1;
103 *(htab_p-14) = m1;
104 *(htab_p-13) = m1;
105 *(htab_p-12) = m1;
106 *(htab_p-11) = m1;
107 *(htab_p-10) = m1;
108 *(htab_p-9) = m1;
109 *(htab_p-8) = m1;
110 *(htab_p-7) = m1;
111 *(htab_p-6) = m1;
112 *(htab_p-5) = m1;
113 *(htab_p-4) = m1;
114 *(htab_p-3) = m1;
115 *(htab_p-2) = m1;
116 *(htab_p-1) = m1;
117 htab_p -= 16;
119 while ((i -= 16) >= 0);
120 for ( i += 16; i > 0; i-- )
122 *--htab_p = m1;
127 * Output the given code.
128 * Inputs:
129 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
130 * that n_bits =< (long)wordsize - 1.
131 * Outputs:
132 * Outputs code to the file.
133 * Assumptions:
134 * Chars are 8 bits long.
135 * Algorithm:
136 * Maintain a BITS character long buffer (so that 8 codes will
137 * fit in it exactly). Use the VAX insv instruction to insert each
138 * code in turn. When the buffer fills up empty it and start over.
141 static char buf[BITS];
143 #ifndef vax
144 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
145 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
146 #endif /* vax */
148 static int output(struct cmcompress_stream* cdata, code_int code)
150 #ifdef DEBUG
151 static int col = 0;
152 #endif /* DEBUG */
155 * On the VAX, it is important to have the register declarations
156 * in exactly the order given, or the asm will break.
158 register int r_off = cdata->offset, bits= cdata->n_bits;
159 register char * bp = buf;
161 #ifdef DEBUG
162 if ( verbose )
164 fprintf( stderr, "%5d%c", code,
165 (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
167 #endif /* DEBUG */
168 if ( code >= 0 )
170 #if defined(vax) && !defined(__GNUC__)
172 * VAX and PCC DEPENDENT!! Implementation on other machines is
173 * below.
175 * Translation: Insert BITS bits from the argument starting at
176 * cdata->offset bits from the beginning of buf.
178 0; /* Work around for pcc -O bug with asm and if stmt */
179 asm( "insv 4(ap),r11,r10,(r9)" );
180 #else
182 * byte/bit numbering on the VAX is simulated by the following code
185 * Get to the first byte.
187 bp += (r_off >> 3);
188 r_off &= 7;
190 * Since code is always >= 8 bits, only need to mask the first
191 * hunk on the left.
193 *bp = (char)((*bp & rmask[r_off]) | ((code << r_off) & lmask[r_off]));
194 bp++;
195 bits -= (8 - r_off);
196 code >>= 8 - r_off;
197 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
198 if ( bits >= 8 )
200 *bp++ = (char)(code);
201 code >>= 8;
202 bits -= 8;
204 /* Last bits. */
205 if(bits)
207 *bp = (char)(code);
209 #endif /* vax */
210 cdata->offset += cdata->n_bits;
211 if ( cdata->offset == (cdata->n_bits << 3) )
213 bp = buf;
214 bits = cdata->n_bits;
215 cdata->bytes_out += bits;
218 if ( cdata->output_stream(cdata, bp, 1) != 1 )
220 return 0;
222 bp++;
224 while(--bits);
225 cdata->offset = 0;
229 * If the next entry is going to be too big for the code size,
230 * then increase it, if possible.
232 if ( cdata->free_ent > cdata->maxcode || (cdata->clear_flg > 0))
235 * Write the whole buffer, because the input side won't
236 * discover the size increase until after it has read it.
238 if ( cdata->offset > 0 )
240 if ( cdata->output_stream(cdata, buf, cdata->n_bits) != cdata->n_bits )
242 return 0;
244 cdata->bytes_out += cdata->n_bits;
246 cdata->offset = 0;
248 if ( cdata->clear_flg )
250 cdata->maxcode = MAXCODE (cdata->n_bits = INIT_BITS);
251 cdata->clear_flg = 0;
253 else
255 cdata->n_bits++;
256 if ( cdata->n_bits == cdata->maxbits )
258 cdata->maxcode = cdata->maxmaxcode;
260 else
262 cdata->maxcode = MAXCODE(cdata->n_bits);
265 #ifdef DEBUG
266 if ( debug )
268 fprintf( stderr, "\nChange to %d bits\n", cdata->n_bits );
269 col = 0;
271 #endif /* DEBUG */
274 else
277 * At EOF, write the rest of the buffer.
279 if ( cdata->offset > 0 )
281 cdata->offset = (cdata->offset + 7) / 8;
282 if ( cdata->output_stream(cdata, buf, cdata->offset ) != cdata->offset )
284 return 0;
286 cdata->bytes_out += cdata->offset;
288 cdata->offset = 0;
289 (void)fflush( stdout );
290 if( ferror( stdout ) )
292 return 0;
294 #ifdef DEBUG
295 if ( verbose )
297 fprintf( stderr, "\n" );
299 #endif
301 return 1;
305 * compress stdin to stdout
307 * Algorithm: use open addressing double hashing (no chaining) on the
308 * prefix code / next character combination. We do a variant of Knuth's
309 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
310 * secondary probe. Here, the modular division first probe is gives way
311 * to a faster exclusive-or manipulation. Also do block compression with
312 * an adaptive reset, whereby the code table is cleared when the compression
313 * ratio decreases, but after the table fills. The variable-length output
314 * codes are re-sized at this point, and a special CLEAR code is generated
315 * for the decompressor. Late addition: construct the table according to
316 * file size for noticeable speed improvement on small files. Please direct
317 * questions about this implementation to ames!jaw.
320 int cmcompress_compress_start(struct cmcompress_stream* cdata)
322 #ifndef COMPATIBLE
323 if (cdata->nomagic == 0)
325 char headLast = (char)(cdata->maxbits | cdata->block_compress);
326 cdata->output_stream(cdata, (const char*)magic_header, 2);
327 cdata->output_stream(cdata, &headLast, 1);
328 if(ferror(stdout))
330 printf("Error...\n");
333 #endif /* COMPATIBLE */
335 cdata->offset = 0;
336 cdata->bytes_out = 3; /* includes 3-byte header mojo */
337 cdata->out_count = 0;
338 cdata->clear_flg = 0;
339 cdata->ratio = 0;
340 cdata->in_count = 1;
341 cdata->checkpoint = CHECK_GAP;
342 cdata->maxcode = MAXCODE(cdata->n_bits = INIT_BITS);
343 cdata->free_ent = ((cdata->block_compress) ? FIRST : 256 );
345 cdata->first_pass = 1;
347 cdata->hshift = 0;
348 for ( cdata->fcode = (long) cdata->hsize; cdata->fcode < 65536L; cdata->fcode *= 2L )
350 cdata->hshift++;
352 cdata->hshift = 8 - cdata->hshift; /* set hash code range bound */
354 cdata->hsize_reg = cdata->hsize;
355 cl_hash(cdata, (count_int) cdata->hsize_reg); /* clear hash table */
357 return 1;
360 static int cl_block (struct cmcompress_stream* cdata) /* table clear for block compress */
362 register long int rat;
364 cdata->checkpoint = cdata->in_count + CHECK_GAP;
365 #ifdef DEBUG
366 if ( cdata->debug )
368 fprintf ( stderr, "count: %ld, ratio: ", cdata->in_count );
369 prratio ( stderr, cdata->in_count, cdata->bytes_out );
370 fprintf ( stderr, "\n");
372 #endif /* DEBUG */
374 if(cdata->in_count > 0x007fffff)
375 { /* shift will overflow */
376 rat = cdata->bytes_out >> 8;
377 if(rat == 0)
378 { /* Don't divide by zero */
379 rat = 0x7fffffff;
381 else
383 rat = cdata->in_count / rat;
386 else
388 rat = (cdata->in_count << 8) / cdata->bytes_out; /* 8 fractional bits */
390 if ( rat > cdata->ratio )
392 cdata->ratio = rat;
394 else
396 cdata->ratio = 0;
397 #ifdef DEBUG
398 if(cdata->verbose)
400 dump_tab(); /* dump string table */
402 #endif
403 cl_hash (cdata, (count_int) cdata->hsize );
404 cdata->free_ent = FIRST;
405 cdata->clear_flg = 1;
406 if ( !output (cdata, (code_int) CLEAR ) )
408 return 0;
410 #ifdef DEBUG
411 if(cdata->debug)
413 fprintf ( stderr, "clear\n" );
415 #endif /* DEBUG */
417 return 1;
421 int cmcompress_compress(struct cmcompress_stream* cdata, void* buff, size_t n)
423 register code_int i;
424 register int c;
425 register int disp;
427 unsigned char* input_buffer = (unsigned char*)buff;
429 size_t cc;
431 /*printf("cmcompress_compress(%p, %p, %d)\n", cdata, buff, n);*/
433 if ( cdata->first_pass )
435 cdata->ent = input_buffer[0];
436 ++ input_buffer;
437 -- n;
438 cdata->first_pass = 0;
441 for ( cc = 0; cc < n; ++ cc )
443 c = input_buffer[cc];
444 cdata->in_count++;
445 cdata->fcode = (long) (((long) c << cdata->maxbits) + cdata->ent);
446 i = ((c << cdata->hshift) ^ cdata->ent); /* xor hashing */
448 if ( htabof (i) == cdata->fcode )
450 cdata->ent = codetabof (i);
451 continue;
453 else if ( (long)htabof (i) < 0 ) /* empty slot */
455 goto nomatch;
457 disp = cdata->hsize_reg - i; /* secondary hash (after G. Knott) */
458 if ( i == 0 )
460 disp = 1;
462 probe:
463 if ( (i -= disp) < 0 )
465 i += cdata->hsize_reg;
468 if ( htabof (i) == cdata->fcode )
470 cdata->ent = codetabof (i);
471 continue;
473 if ( (long)htabof (i) > 0 )
475 goto probe;
477 nomatch:
478 if ( !output(cdata, (code_int) cdata->ent ) )
480 return 0;
482 cdata->out_count++;
483 cdata->ent = c;
484 if (
485 #ifdef SIGNED_COMPARE_SLOW
486 (unsigned) cdata->free_ent < (unsigned) cdata->maxmaxcode
487 #else
488 cdata->free_ent < cdata->maxmaxcode
489 #endif
492 codetabof (i) = (unsigned short)(cdata->free_ent++); /* code -> hashtable */
493 htabof (i) = cdata->fcode;
495 else if ( (count_int)cdata->in_count >= cdata->checkpoint && cdata->block_compress )
497 if ( !cl_block (cdata) )
499 return 0;
504 return 1;
507 int cmcompress_compress_finalize(struct cmcompress_stream* cdata)
510 * Put out the final code.
512 if ( !output(cdata, (code_int)cdata->ent ) )
514 return 0;
516 cdata->out_count++;
517 if ( !output(cdata, (code_int)-1 ) )
519 return 0;
522 if(cdata->bytes_out > cdata->in_count) /* exit(2) if no savings */
524 return 0;
526 return 1;
530 #if defined(DEBUG)
531 static void prratio(FILE *stream, long int num, long int den)
533 register int q; /* Doesn't need to be long */
535 if(num > 214748L)
536 { /* 2147483647/10000 */
537 q = num / (den / 10000L);
539 else
541 q = 10000L * num / den; /* Long calculations, though */
543 if (q < 0)
545 putc('-', stream);
546 q = -q;
548 fprintf(stream, "%d.%02d%%", q / 100, q % 100);
550 #endif