Merge pull request #12 from davel/davel/sqsh
[debian-nspark.git] / compress.c
blobfd1357ff558f16a355a694da556e96febaaf6f0d
2 /*
3 * compress/uncompress archive
5 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas)
6 * Jim McKie (decvax!mcvax!jim)
7 * Steve Davies (decvax!vax135!petsd!peora!srd)
8 * Ken Turkowski (decvax!decwrl!turtlevax!ken)
9 * James A. Woods (decvax!ihnp4!ames!jaw)
10 * Joe Orost (decvax!vax135!petsd!joe)
12 * NOTE: these functions also support "squash" (which is just a
13 * 13-bit compress), and "crunch" (which is a 12-bit compress
14 * with additional run-length encoding). AJD
16 * $Header: compress.c 1.11 95/08/01 $
17 * $Log: compress.c,v $
18 * Revision 1.11 95/08/01 xx:xx:xx BB
19 * Quite a few changes for Borland C/C++
20 * Made htab and codetab arrays dynamic.
21 * (Compile with -DBB_HUGE_STATIC_ARRAYS if you DO want these
22 * huge static arrays in your executable.)
23 * Changed pointers to normalized or huge pointers because
24 * arrays span more than 64k.
25 * Changed a few types from int to long because 32bits integers
26 * are needed.
28 * Revision 1.10 95/01/25 12:49:43 arb
29 * Bug fixes caused by 1.9
31 * Revision 1.9 95/01/06 12:00:06 arb
32 * Fixes for Alpha.
34 * Revision 1.8 94/02/28 23:57:55 arb
35 * Fixed number of compression bits for ArcFS format archives
37 * Revision 1.7 93/08/20 11:35:20 arb
38 * Prevent printing of "uncompressed" etc. if quiet flag is set
40 * Revision 1.6 92/12/07 17:17:28 duplain
41 * reformatted source.
43 * Revision 1.5 92/11/09 14:48:00 duplain
44 * Initialised offset and size from getcode() each time uncompress() called.
46 * Revision 1.4 92/11/02 11:43:14 duplain
47 * Correct comment about crunch/squash in header.
49 * Revision 1.3 92/10/23 14:08:13 duplain
50 * Minor changes to printf's at end of uncompress.
52 * Revision 1.2 92/10/01 11:20:19 duplain
53 * Added check for EOF.
55 * Revision 1.1 92/09/29 18:02:14 duplain
56 * Initial revision
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include "spark.h"
63 #include "pack.h"
64 #include "main.h"
65 #include "crc.h"
66 #include "garble.h"
67 #include "error.h"
69 /* BB changed next line because of conflict with Borland's io.h */
71 /* #include "io.h" */
72 #include "nsparkio.h"
73 #include "arcfs.h"
75 #if defined(__MSDOS__) && !defined(MSDOS32)
76 #ifdef __WATCOMC__
77 #include <malloc.h>
78 #define farcalloc halloc
79 #else
80 #include <alloc.h> /* for farcalloc() */
81 #endif
82 #else
83 #define farcalloc calloc
84 #endif /* __MSDOS__ */
86 #define PBITS 16
87 #define CRUNCHBITS 12
88 #define SQUASHBITS 13
89 #define COMPRESSBITS 16
91 /* BB changed constant in next line to long: 16bits 65536 == 0 ! */
92 #define HSIZE 65536L
93 #define INIT_BITS 9 /* initial number of bits/code */
95 /* BB changed next macros.
96 * Arrays htab and codetab both exceed 64k. To prevent wraparound
97 at the 64k boundary, normalized or huge pointers have to be used.
98 Since subscripts are 16 bit ints under the Borland compiler,
99 subscripts have to be made explicitely long.
100 And finally COMPRESSBITS == 16, but 1 << 16 == 0 for 16 bits
101 integers! */
103 /* #define MAXCODE(n_bits) ((1 << (n_bits)) - 1) */
105 /* #define htabof(i) htab[i] */
107 /* #define codetabof(i) codetab[i] */
109 /* #define tab_prefixof(i) codetabof(i) */
111 /* #define tab_suffixof(i) ((char_type *)(htab))[i] */
113 /* #define de_stack ((char_type *)&tab_suffixof(1<<COMPRESSBITS)) */
114 #if defined(__MSDOS__) && !defined(MSDOS32)
115 #define MAXCODE(n_bits) ((long)(1L << (n_bits)) - 1L)
116 #define htabof(i) htab[(long)(i)]
117 #define codetabof(i) codetab[(long)(i)]
118 #define tab_prefixof(i) codetabof(i)
119 #define tab_suffixof(i) ((char_type huge *)(htab))[(long)(i)]
120 #define de_stack \
121 ((char_type huge *)&tab_suffixof(1L<<COMPRESSBITS))
122 #else
123 #define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
124 #define htabof(i) htab[i]
125 #define codetabof(i) codetab[i]
126 #define tab_prefixof(i) codetabof(i)
127 #define tab_suffixof(i) ((char_type *)(htab))[i]
128 #define de_stack ((char_type *)&tab_suffixof(1<<COMPRESSBITS))
129 #endif /* __MSDOS__ */
130 #define FIRST 257 /* first free entry */
131 #define CLEAR 256 /* table clear output code */
133 /* BB changed next two lines. For 16 bits, the maximum code_int
134 becomes zero again! (1 << 16 == 0).
135 Debugging the un*x version shows that
136 count_int should be a 32bits integer! */
138 /* typedef int code_int; */
140 /* typedef int count_int; */
141 #if defined(__MSDOS__) && !defined(MSDOS32)
142 #define NSHUGE huge
143 typedef long code_int;
144 typedef long count_int;
145 #else
146 #define NSHUGE
147 typedef int count_int;
148 typedef int code_int;
149 #endif /* __MSDOS__ */
150 typedef unsigned char char_type;
152 static int n_bits; /* number of bits/code */
153 static int maxbits; /* user settable max # bits/code */
154 static code_int maxcode; /* maximum code, given n_bits */
155 static code_int maxmaxcode; /* should NEVER generate this code */
157 /* BB changed next two lines.
158 Under Borland C/C++ static arrays are REALLY static, i.e. they
159 clog the executable with some 384k of `empty space'. So use
160 dynamic arrays instead. */
162 /* static count_int htab[HSIZE]; */
164 /* static unsigned short codetab[HSIZE]; */
165 #if !defined(__MSDOS__) || defined(MSDOS32)
166 #define BB_HUGE_STATIC_ARRAYS
167 #else
168 /* For those that do want to use static arrays:
169 define BB_HUGE_STATIC_ARRAYS. */
170 #endif
172 #ifdef BB_HUGE_STATIC_ARRAYS
173 static count_int NSHUGE htab[HSIZE];
174 static unsigned short NSHUGE codetab[HSIZE];
175 #else /* BB_HUGE_STATIC_ARRAYS */
176 static count_int NSHUGE *htab = NULL;
177 static unsigned short NSHUGE *codetab = NULL;
178 #endif /* BB_HUGE_STATIC_ARRAYS */
180 static char_type rmask[9] =
181 { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
182 static code_int free_ent; /* first unused entry */
183 static int clear_flg;
184 static long readsize; /* number of bytes left to read */
186 static code_int offset; /* from getcode() */
187 static size_t size;
189 static code_int getcode(FILE * ifp);
192 Status
193 uncompress(Header *header, FILE *ifp, FILE *ofp, CompType type)
195 /* BB changed next line. stackp points to huge pointers. */
196 register char_type NSHUGE *stackp;
197 register code_int finchar;
198 register code_int code, oldcode, incode;
199 char *message;
201 init_garble();
203 #if !defined(BB_HUGE_STATIC_ARRAYS)
204 if (!htab)
205 htab = (count_int NSHUGE *) farcalloc(HSIZE, sizeof(count_int));
206 if (!codetab)
207 codetab =
208 (unsigned short NSHUGE *) farcalloc(HSIZE,
209 sizeof(unsigned short));
210 if (!htab || !codetab)
212 error("%s: uncompress: out of memory", ourname);
213 exit(1);
215 #endif /* ! BB_HUGE_STATIC_ARRAYS */
217 crc = 0;
218 clear_flg = 0;
219 offset = 0;
220 size = 0;
221 readsize = header->complen;
223 if (type == SQUASH)
224 maxbits = SQUASHBITS;
225 else if (type == UNIX_COMPRESS)
227 /* Read the unix compress header */
228 read_byte(ifp);
229 read_byte(ifp);
230 maxbits = read_byte(ifp) & 0x1f;
231 readsize -= 3;
233 else
235 if (arcfs)
237 maxbits = arcfs_maxbits;
238 ungarble('\0'); // Need to consume one byte of password.
240 else
242 maxbits = read_byte(ifp);
243 readsize--;
246 maxmaxcode = MAXCODE(maxbits);
249 * As above, initialize the first 256 entries in the table.
251 maxcode = MAXCODE(n_bits = INIT_BITS);
252 for (code = 255; code >= 0; code--)
254 tab_prefixof(code) = 0;
255 tab_suffixof(code) = (char_type) code;
257 free_ent = FIRST;
259 finchar = oldcode = getcode(ifp);
260 if (oldcode == -1) /* EOF already? */
261 goto compress_exit; /* Get out of here */
263 /* first code must be 8 bits = char */
264 if (type == CRUNCH)
266 putc_init();
267 /* BB changed next line for Borland C/C++ 4 */
268 putc_ncr(ofp, (Byte) finchar);
270 else
272 /* BB changed next three lines for Borland C/C++ 4 */
273 if (!testing)
274 write_byte(ofp, (Byte) finchar);
275 calccrc((Byte) finchar);
278 stackp = de_stack;
280 while ((code = getcode(ifp)) != -1)
282 if (check_stream(ifp) != FNOERR)
283 break;
284 if (code == CLEAR)
286 for (code = 255; code >= 0; code--)
287 tab_prefixof(code) = 0;
288 clear_flg = 1;
289 free_ent = FIRST - 1;
290 if ((code = getcode(ifp)) == -1) /* O, untimely death! */
291 break;
293 incode = code;
295 * Special case for KwKwK string.
297 if (code >= free_ent)
299 /* BB changed next line for Borland C/C++ 4 */
300 *stackp++ = (char_type) finchar;
301 code = oldcode;
304 * Generate output characters in reverse order
307 while (code >= 256)
309 if ((char NSHUGE *)(stackp+1) > (char NSHUGE *)(&htab[0] + HSIZE))
311 error("%s: uncompress: corrupt or garbled archive file", ourname);
312 exit(1);
314 *stackp++ = tab_suffixof(code);
315 code = tab_prefixof(code);
317 if ((char NSHUGE *)(stackp+1) > (char NSHUGE *)(&htab[0] + HSIZE))
319 error("%s: uncompress: corrupt or garbled archive file", ourname);
320 exit(1);
322 /* BB changed next line for Borland C/C++ 4 */
323 /* *stackp++ = finchar = tab_suffixof(code); */
324 finchar = tab_suffixof(code);
325 *stackp++ = (char_type) finchar;
328 * And put them out in forward order
330 while (stackp > de_stack)
332 stackp--;
333 if (type == CRUNCH)
334 putc_ncr(ofp, *stackp);
335 else
337 if (!testing)
338 write_byte(ofp, *stackp);
339 calccrc(*stackp);
344 * Generate the new entry.
346 if ((code = free_ent) < maxmaxcode)
348 /* BB changed next two lines for Borland C/C++ 4 */
349 tab_prefixof(code) = (unsigned short) oldcode;
350 tab_suffixof(code) = (char_type) finchar;
351 free_ent = code + 1;
354 * Remember previous code.
356 oldcode = incode;
358 compress_exit:
359 if (check_stream(ifp) == FRWERR)
360 return (RERR);
361 if (!testing && check_stream(ofp) == FRWERR)
362 return (WERR);
363 if ((Halfword) crc != header->crc)
364 return (CRCERR);
365 if (testing)
366 switch (type)
368 case COMPRESS:
369 case UNIX_COMPRESS:
370 message = "OK (compressed)";
371 break;
372 case CRUNCH:
373 message = "OK (crunched)";
374 break;
375 case SQUASH:
376 message = "OK (squashed)";
377 break;
378 default:
379 message = "internal error";
380 break;
382 else
383 switch (type)
385 case COMPRESS:
386 case UNIX_COMPRESS:
387 message = "uncompressed";
388 break;
389 case CRUNCH:
390 message = "uncrunched";
391 break;
392 case SQUASH:
393 message = "unsquashed";
394 break;
395 default:
396 message = "internal error";
397 break;
399 if (!quiet)
400 msg("%s", message);
401 return (NOERR);
405 * Read one code from the input. If EOF, return -1.
407 static code_int
408 getcode(FILE *ifp)
410 register code_int code;
411 static char_type buf[COMPRESSBITS];
412 register int r_off, bits;
413 size_t i;
414 /* BB changed next line. We are doing pointer-artithmatics
415 and that can be dangerous if other than normalized (huge)
416 pointers are being used. */
417 register char_type NSHUGE *bp = buf;
419 if (clear_flg > 0 || offset >= size || free_ent > maxcode)
422 * If the next entry will be too big for the current code
423 * size, then we must increase the size. This implies
424 * reading a new buffer full, too.
426 if (free_ent > maxcode)
428 n_bits++;
429 maxcode = n_bits == maxbits ? maxmaxcode : MAXCODE(n_bits);
431 if (clear_flg > 0)
433 maxcode = MAXCODE(n_bits = INIT_BITS);
434 clear_flg = 0;
436 if (readsize == 0)
437 return (-1);
438 /* BB added cast to next line */
439 size = readsize < n_bits ? (size_t) readsize : n_bits;
440 size = fread(buf, 1, size, ifp);
441 if (size == 0)
442 return (-1); /* end of file */
443 for (i = 0; i < size; i++)
445 buf[i] = ungarble(buf[i]);
447 readsize -= size;
448 offset = 0;
449 /* Round size down to integral number of codes */
450 size = (size << 3) - (n_bits - 1);
452 r_off = (int)offset;
453 bits = n_bits;
456 * Get to the first byte.
458 bp += (r_off >> 3);
459 r_off &= 7;
460 /* Get first part (low order bits) */
462 code = (*bp++ >> r_off);
463 bits -= (8 - r_off);
464 r_off = 8 - r_off; /* now, offset into code word */
465 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
466 if (bits >= 8)
468 code |= *bp++ << r_off;
469 r_off += 8;
470 bits -= 8;
472 /* high order bits. */
473 code |= (*bp & rmask[bits]) << r_off;
474 offset += n_bits;
476 return (code);