2 * Copyright (c) 1985, 1986 The Regents of the University of California.
5 * This code is derived from software contributed to Berkeley by
6 * James A. Woods, derived from original work by Spencer Thomas
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by the University of
20 * California, Berkeley and its contributors.
21 * 4. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 #include "cmcompress.h"
43 static const char_type magic_header
[] = { "\037\235" }; /* 1F 9D */
45 /* Defines for third byte of header */
47 #define BLOCK_MASK 0x80
48 #define CHECK_GAP 10000 /* ratio check interval */
49 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is
50 a fourth header byte (for expansion).
52 #define INIT_BITS 9 /* initial number of bits/code */
54 #ifdef COMPATIBLE /* But wrong! */
55 # define MAXCODE(n_bits) (1 << (n_bits) - 1)
57 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
58 #endif /* COMPATIBLE */
60 #define htabof(i) cdata->htab[i]
61 #define codetabof(i) cdata->codetab[i]
64 * the next two codes should not be changed lightly, as they must not
65 * lie within the contiguous general code space.
67 #define FIRST 257 /* first free entry */
68 #define CLEAR 256 /* table clear output code */
71 static void prratio( FILE *stream
, long int num
, long int den
);
74 int cmcompress_compress_initialize(struct cmcompress_stream
* cdata
)
76 cdata
->maxbits
= BITS
; /* user settable max # bits/code */
77 cdata
->maxmaxcode
= 1 << BITS
; /* should NEVER generate this code */
78 cdata
->hsize
= HSIZE
; /* for dynamic table sizing */
79 cdata
->free_ent
= 0; /* first unused entry */
80 cdata
->nomagic
= 0; /* Use a 3-byte magic number header, unless old file */
81 cdata
->block_compress
= BLOCK_MASK
;
84 cdata
->checkpoint
= CHECK_GAP
;
86 cdata
->input_stream
= 0;
87 cdata
->output_stream
= 0;
88 cdata
->client_data
= 0;
92 static void cl_hash(struct cmcompress_stream
* cdata
, count_int hsize
) /* reset code table */
94 register count_int
*htab_p
= cdata
->htab
+hsize
;
96 register long m1
= -1;
100 { /* might use Sys V memset(3) here */
119 while ((i
-= 16) >= 0);
120 for ( i
+= 16; i
> 0; i
-- )
127 * Output the given code.
129 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
130 * that n_bits =< (long)wordsize - 1.
132 * Outputs code to the file.
134 * Chars are 8 bits long.
136 * Maintain a BITS character long buffer (so that 8 codes will
137 * fit in it exactly). Use the VAX insv instruction to insert each
138 * code in turn. When the buffer fills up empty it and start over.
141 static char buf
[BITS
];
144 char_type lmask
[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
145 char_type rmask
[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
148 static int output(struct cmcompress_stream
* cdata
, code_int code
)
155 * On the VAX, it is important to have the register declarations
156 * in exactly the order given, or the asm will break.
158 register int r_off
= cdata
->offset
, bits
= cdata
->n_bits
;
159 register char * bp
= buf
;
164 fprintf( stderr
, "%5d%c", code
,
165 (col
+=6) >= 74 ? (col
= 0, '\n') : ' ' );
170 #if defined(vax) && !defined(__GNUC__)
172 * VAX and PCC DEPENDENT!! Implementation on other machines is
175 * Translation: Insert BITS bits from the argument starting at
176 * cdata->offset bits from the beginning of buf.
178 0; /* Work around for pcc -O bug with asm and if stmt */
179 asm( "insv 4(ap),r11,r10,(r9)" );
182 * byte/bit numbering on the VAX is simulated by the following code
185 * Get to the first byte.
190 * Since code is always >= 8 bits, only need to mask the first
193 *bp
= (char)((*bp
& rmask
[r_off
]) | ((code
<< r_off
) & lmask
[r_off
]));
197 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
200 *bp
++ = (char)(code
);
210 cdata
->offset
+= cdata
->n_bits
;
211 if ( cdata
->offset
== (cdata
->n_bits
<< 3) )
214 bits
= cdata
->n_bits
;
215 cdata
->bytes_out
+= bits
;
218 if ( cdata
->output_stream(cdata
, bp
, 1) != 1 )
229 * If the next entry is going to be too big for the code size,
230 * then increase it, if possible.
232 if ( cdata
->free_ent
> cdata
->maxcode
|| (cdata
->clear_flg
> 0))
235 * Write the whole buffer, because the input side won't
236 * discover the size increase until after it has read it.
238 if ( cdata
->offset
> 0 )
240 if ( cdata
->output_stream(cdata
, buf
, cdata
->n_bits
) != cdata
->n_bits
)
244 cdata
->bytes_out
+= cdata
->n_bits
;
248 if ( cdata
->clear_flg
)
250 cdata
->maxcode
= MAXCODE (cdata
->n_bits
= INIT_BITS
);
251 cdata
->clear_flg
= 0;
256 if ( cdata
->n_bits
== cdata
->maxbits
)
258 cdata
->maxcode
= cdata
->maxmaxcode
;
262 cdata
->maxcode
= MAXCODE(cdata
->n_bits
);
268 fprintf( stderr
, "\nChange to %d bits\n", cdata
->n_bits
);
277 * At EOF, write the rest of the buffer.
279 if ( cdata
->offset
> 0 )
281 cdata
->offset
= (cdata
->offset
+ 7) / 8;
282 if ( cdata
->output_stream(cdata
, buf
, cdata
->offset
) != cdata
->offset
)
286 cdata
->bytes_out
+= cdata
->offset
;
289 (void)fflush( stdout
);
290 if( ferror( stdout
) )
297 fprintf( stderr
, "\n" );
305 * compress stdin to stdout
307 * Algorithm: use open addressing double hashing (no chaining) on the
308 * prefix code / next character combination. We do a variant of Knuth's
309 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
310 * secondary probe. Here, the modular division first probe is gives way
311 * to a faster exclusive-or manipulation. Also do block compression with
312 * an adaptive reset, whereby the code table is cleared when the compression
313 * ratio decreases, but after the table fills. The variable-length output
314 * codes are re-sized at this point, and a special CLEAR code is generated
315 * for the decompressor. Late addition: construct the table according to
316 * file size for noticeable speed improvement on small files. Please direct
317 * questions about this implementation to ames!jaw.
320 int cmcompress_compress_start(struct cmcompress_stream
* cdata
)
323 if (cdata
->nomagic
== 0)
325 char headLast
= (char)(cdata
->maxbits
| cdata
->block_compress
);
326 cdata
->output_stream(cdata
, (const char*)magic_header
, 2);
327 cdata
->output_stream(cdata
, &headLast
, 1);
330 printf("Error...\n");
333 #endif /* COMPATIBLE */
336 cdata
->bytes_out
= 3; /* includes 3-byte header mojo */
337 cdata
->out_count
= 0;
338 cdata
->clear_flg
= 0;
341 cdata
->checkpoint
= CHECK_GAP
;
342 cdata
->maxcode
= MAXCODE(cdata
->n_bits
= INIT_BITS
);
343 cdata
->free_ent
= ((cdata
->block_compress
) ? FIRST
: 256 );
345 cdata
->first_pass
= 1;
348 for ( cdata
->fcode
= (long) cdata
->hsize
; cdata
->fcode
< 65536L; cdata
->fcode
*= 2L )
352 cdata
->hshift
= 8 - cdata
->hshift
; /* set hash code range bound */
354 cdata
->hsize_reg
= cdata
->hsize
;
355 cl_hash(cdata
, (count_int
) cdata
->hsize_reg
); /* clear hash table */
360 static int cl_block (struct cmcompress_stream
* cdata
) /* table clear for block compress */
362 register long int rat
;
364 cdata
->checkpoint
= cdata
->in_count
+ CHECK_GAP
;
368 fprintf ( stderr
, "count: %ld, ratio: ", cdata
->in_count
);
369 prratio ( stderr
, cdata
->in_count
, cdata
->bytes_out
);
370 fprintf ( stderr
, "\n");
374 if(cdata
->in_count
> 0x007fffff)
375 { /* shift will overflow */
376 rat
= cdata
->bytes_out
>> 8;
378 { /* Don't divide by zero */
383 rat
= cdata
->in_count
/ rat
;
388 rat
= (cdata
->in_count
<< 8) / cdata
->bytes_out
; /* 8 fractional bits */
390 if ( rat
> cdata
->ratio
)
400 dump_tab(); /* dump string table */
403 cl_hash (cdata
, (count_int
) cdata
->hsize
);
404 cdata
->free_ent
= FIRST
;
405 cdata
->clear_flg
= 1;
406 if ( !output (cdata
, (code_int
) CLEAR
) )
413 fprintf ( stderr
, "clear\n" );
421 int cmcompress_compress(struct cmcompress_stream
* cdata
, void* buff
, size_t n
)
427 unsigned char* input_buffer
= (unsigned char*)buff
;
431 /*printf("cmcompress_compress(%p, %p, %d)\n", cdata, buff, n);*/
433 if ( cdata
->first_pass
)
435 cdata
->ent
= input_buffer
[0];
438 cdata
->first_pass
= 0;
441 for ( cc
= 0; cc
< n
; ++ cc
)
443 c
= input_buffer
[cc
];
445 cdata
->fcode
= (long) (((long) c
<< cdata
->maxbits
) + cdata
->ent
);
446 i
= ((c
<< cdata
->hshift
) ^ cdata
->ent
); /* xor hashing */
448 if ( htabof (i
) == cdata
->fcode
)
450 cdata
->ent
= codetabof (i
);
453 else if ( (long)htabof (i
) < 0 ) /* empty slot */
457 disp
= cdata
->hsize_reg
- i
; /* secondary hash (after G. Knott) */
463 if ( (i
-= disp
) < 0 )
465 i
+= cdata
->hsize_reg
;
468 if ( htabof (i
) == cdata
->fcode
)
470 cdata
->ent
= codetabof (i
);
473 if ( (long)htabof (i
) > 0 )
478 if ( !output(cdata
, (code_int
) cdata
->ent
) )
485 #ifdef SIGNED_COMPARE_SLOW
486 (unsigned) cdata
->free_ent
< (unsigned) cdata
->maxmaxcode
488 cdata
->free_ent
< cdata
->maxmaxcode
492 codetabof (i
) = (unsigned short)(cdata
->free_ent
++); /* code -> hashtable */
493 htabof (i
) = cdata
->fcode
;
495 else if ( (count_int
)cdata
->in_count
>= cdata
->checkpoint
&& cdata
->block_compress
)
497 if ( !cl_block (cdata
) )
507 int cmcompress_compress_finalize(struct cmcompress_stream
* cdata
)
510 * Put out the final code.
512 if ( !output(cdata
, (code_int
)cdata
->ent
) )
517 if ( !output(cdata
, (code_int
)-1 ) )
522 if(cdata
->bytes_out
> cdata
->in_count
) /* exit(2) if no savings */
531 static void prratio(FILE *stream
, long int num
, long int den
)
533 register int q
; /* Doesn't need to be long */
536 { /* 2147483647/10000 */
537 q
= num
/ (den
/ 10000L);
541 q
= 10000L * num
/ den
; /* Long calculations, though */
548 fprintf(stream
, "%d.%02d%%", q
/ 100, q
% 100);