Add README
[xapian-trec.git] / unlzw.c
blobfe95b88faebefee7125df9e3c5d9bbffa91cedcd
1 /* unlzw.c -- decompress files in LZW format.
2 * The code in this file is directly derived from the public domain 'compress'
3 * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies,
4 * Ken Turkowski, Dave Mack and Peter Jannesen.
6 * This is a temporary version which will be rewritten in some future version
7 * to accommodate in-memory decompression.
8 */
10 #ifdef RCSID
11 static char rcsid[] = "$Id: unlzw.c,v 0.15 1993/06/10 13:28:35 jloup Exp $";
12 #endif
14 #include <sys/types.h>
16 #include "tailor.h"
18 #ifdef HAVE_UNISTD_H
19 # include <unistd.h>
20 #endif
21 #ifndef NO_FCNTL_H
22 # include <fcntl.h>
23 #endif
25 #include "gzip.h"
26 #include "lzw.h"
28 typedef unsigned char char_type;
29 typedef long code_int;
30 typedef unsigned long count_int;
31 typedef unsigned short count_short;
32 typedef unsigned long cmp_code_int;
34 #define MAXCODE(n) (1L << (n))
36 #ifndef REGISTERS
37 # define REGISTERS 2
38 #endif
39 #define REG1
40 #define REG2
41 #define REG3
42 #define REG4
43 #define REG5
44 #define REG6
45 #define REG7
46 #define REG8
47 #define REG9
48 #define REG10
49 #define REG11
50 #define REG12
51 #define REG13
52 #define REG14
53 #define REG15
54 #define REG16
55 #if REGISTERS >= 1
56 # undef REG1
57 # define REG1 register
58 #endif
59 #if REGISTERS >= 2
60 # undef REG2
61 # define REG2 register
62 #endif
63 #if REGISTERS >= 3
64 # undef REG3
65 # define REG3 register
66 #endif
67 #if REGISTERS >= 4
68 # undef REG4
69 # define REG4 register
70 #endif
71 #if REGISTERS >= 5
72 # undef REG5
73 # define REG5 register
74 #endif
75 #if REGISTERS >= 6
76 # undef REG6
77 # define REG6 register
78 #endif
79 #if REGISTERS >= 7
80 # undef REG7
81 # define REG7 register
82 #endif
83 #if REGISTERS >= 8
84 # undef REG8
85 # define REG8 register
86 #endif
87 #if REGISTERS >= 9
88 # undef REG9
89 # define REG9 register
90 #endif
91 #if REGISTERS >= 10
92 # undef REG10
93 # define REG10 register
94 #endif
95 #if REGISTERS >= 11
96 # undef REG11
97 # define REG11 register
98 #endif
99 #if REGISTERS >= 12
100 # undef REG12
101 # define REG12 register
102 #endif
103 #if REGISTERS >= 13
104 # undef REG13
105 # define REG13 register
106 #endif
107 #if REGISTERS >= 14
108 # undef REG14
109 # define REG14 register
110 #endif
111 #if REGISTERS >= 15
112 # undef REG15
113 # define REG15 register
114 #endif
115 #if REGISTERS >= 16
116 # undef REG16
117 # define REG16 register
118 #endif
120 #ifndef BYTEORDER
121 # define BYTEORDER 0000
122 #endif
124 #ifndef NOALLIGN
125 # define NOALLIGN 0
126 #endif
129 union bytes {
130 long word;
131 struct {
132 #if BYTEORDER == 4321
133 char_type b1;
134 char_type b2;
135 char_type b3;
136 char_type b4;
137 #else
138 #if BYTEORDER == 1234
139 char_type b4;
140 char_type b3;
141 char_type b2;
142 char_type b1;
143 #else
144 # undef BYTEORDER
145 int dummy;
146 #endif
147 #endif
148 } bytes;
151 #if BYTEORDER == 4321 && NOALLIGN == 1
152 # define input(b,o,c,n,m){ \
153 (c) = (*(long *)(&(b)[(o)>>3])>>((o)&0x7))&(m); \
154 (o) += (n); \
156 #else
157 # define input(b,o,c,n,m){ \
158 REG1 char_type *p = &(b)[(o)>>3]; \
159 (c) = ((((long)(p[0]))|((long)(p[1])<<8)| \
160 ((long)(p[2])<<16))>>((o)&0x7))&(m); \
161 (o) += (n); \
163 #endif
165 #ifndef MAXSEG_64K
166 /* DECLARE(ush, tab_prefix, (1<<BITS)); -- prefix code */
167 # define tab_prefixof(i) tab_prefix[i]
168 # define clear_tab_prefixof() memzero(tab_prefix, 256);
169 #else
170 /* DECLARE(ush, tab_prefix0, (1<<(BITS-1)); -- prefix for even codes */
171 /* DECLARE(ush, tab_prefix1, (1<<(BITS-1)); -- prefix for odd codes */
172 ush *tab_prefix[2];
173 # define tab_prefixof(i) tab_prefix[(i)&1][(i)>>1]
174 # define clear_tab_prefixof() \
175 memzero(tab_prefix0, 128), \
176 memzero(tab_prefix1, 128);
177 #endif
178 #define de_stack ((char_type *)(&d_buf[DIST_BUFSIZE-1]))
179 #define tab_suffixof(i) tab_suffix[i]
181 int block_mode = BLOCK_MODE; /* block compress mode -C compatible with 2.0 */
183 /* ============================================================================
184 * Decompress in to out. This routine adapts to the codes in the
185 * file building the "string" table on-the-fly; requiring no table to
186 * be stored in the compressed file.
187 * IN assertions: the buffer inbuf contains already the beginning of
188 * the compressed data, from offsets iptr to insize-1 included.
189 * The magic header has already been checked and skipped.
190 * bytes_in and bytes_out have been initialized.
192 int unlzw(in, out)
193 int in, out; /* input and output file descriptors */
195 REG2 char_type *stackp;
196 REG3 code_int code;
197 REG4 int finchar;
198 REG5 code_int oldcode;
199 REG6 code_int incode;
200 REG7 long inbits;
201 REG8 long posbits;
202 REG9 int outpos;
203 /* REG10 int insize; (global) */
204 REG11 unsigned bitmask;
205 REG12 code_int free_ent;
206 REG13 code_int maxcode;
207 REG14 code_int maxmaxcode;
208 REG15 int n_bits;
209 REG16 int rsize;
211 maxbits = get_byte();
212 block_mode = maxbits & BLOCK_MODE;
213 if ((maxbits & LZW_RESERVED) != 0) {
214 WARN((stderr, "\n%s: %s: warning, unknown flags 0x%x\n",
215 progname, ifname, maxbits & LZW_RESERVED));
217 maxbits &= BIT_MASK;
218 maxmaxcode = MAXCODE(maxbits);
220 if (maxbits > BITS) {
221 fprintf(stderr,
222 "\n%s: %s: compressed with %d bits, can only handle %d bits\n",
223 progname, ifname, maxbits, BITS);
224 exit_code = ERROR;
225 return ERROR;
227 rsize = insize;
228 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
229 bitmask = (1<<n_bits)-1;
230 oldcode = -1;
231 finchar = 0;
232 outpos = 0;
233 posbits = inptr<<3;
235 free_ent = ((block_mode) ? FIRST : 256);
237 clear_tab_prefixof(); /* Initialize the first 256 entries in the table. */
239 for (code = 255 ; code >= 0 ; --code) {
240 tab_suffixof(code) = (char_type)code;
242 do {
243 REG1 int i;
244 int e;
245 int o;
247 resetbuf:
248 e = insize-(o = (posbits>>3));
250 for (i = 0 ; i < e ; ++i) {
251 inbuf[i] = inbuf[i+o];
253 insize = e;
254 posbits = 0;
256 if (insize < INBUF_EXTRA) {
257 if ((rsize = read(in, (char*)inbuf+insize, INBUFSIZ)) == EOF) {
258 read_error();
260 insize += rsize;
261 bytes_in += (ulg)rsize;
263 inbits = ((rsize != 0) ? ((long)insize - insize%n_bits)<<3 :
264 ((long)insize<<3)-(n_bits-1));
266 while (inbits > posbits) {
267 if (free_ent > maxcode) {
268 posbits = ((posbits-1) +
269 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
270 ++n_bits;
271 if (n_bits == maxbits) {
272 maxcode = maxmaxcode;
273 } else {
274 maxcode = MAXCODE(n_bits)-1;
276 bitmask = (1<<n_bits)-1;
277 goto resetbuf;
279 input(inbuf,posbits,code,n_bits,bitmask);
280 Tracev((stderr, "%d ", code));
282 if (oldcode == -1) {
283 if (code >= 256) error("corrupt input.");
284 outbuf[outpos++] = (char_type)(finchar = (int)(oldcode=code));
285 continue;
287 if (code == CLEAR && block_mode) {
288 clear_tab_prefixof();
289 free_ent = FIRST - 1;
290 posbits = ((posbits-1) +
291 ((n_bits<<3)-(posbits-1+(n_bits<<3))%(n_bits<<3)));
292 maxcode = MAXCODE(n_bits = INIT_BITS)-1;
293 bitmask = (1<<n_bits)-1;
294 goto resetbuf;
296 incode = code;
297 stackp = de_stack;
299 if (code >= free_ent) { /* Special case for KwKwK string. */
300 if (code > free_ent) {
301 #ifdef DEBUG
302 char_type *p;
304 posbits -= n_bits;
305 p = &inbuf[posbits>>3];
306 fprintf(stderr,
307 "code:%ld free_ent:%ld n_bits:%d insize:%u\n",
308 code, free_ent, n_bits, insize);
309 fprintf(stderr,
310 "posbits:%ld inbuf:%02X %02X %02X %02X %02X\n",
311 posbits, p[-1],p[0],p[1],p[2],p[3]);
312 #endif
314 #ifdef NOT_NEEDED
315 if (!test && outpos > 0) {
316 write_buf(out, (char*)outbuf, outpos);
317 bytes_out += (ulg)outpos;
319 #endif
320 error(to_stdout ? "corrupt input." :
321 "corrupt input. Use zcat to recover some data.");
323 *--stackp = (char_type)finchar;
324 code = oldcode;
327 while ((cmp_code_int)code >= (cmp_code_int)256) {
328 /* Generate output characters in reverse order */
329 *--stackp = tab_suffixof(code);
330 code = tab_prefixof(code);
332 *--stackp = (char_type)(finchar = tab_suffixof(code));
334 /* And put them out in forward order */
336 REG1 int i;
338 if (outpos+(i = (de_stack-stackp)) >= outbuflen) {
339 do {
340 if (i > outbuflen-outpos) i = outbuflen-outpos;
342 if (i > 0) {
343 memcpy(outbuf+outpos, stackp, i);
344 outpos += i;
346 if (outpos >= outbuflen) {
347 #ifdef NOT_NEEDED
348 if (!test) {
349 write_buf(out, (char*)outbuf, outpos);
350 bytes_out += (ulg)outpos;
352 #endif
353 fprintf(stderr, "Error: Chamber overflow!\n");
354 outpos = 0;
356 stackp+= i;
357 } while ((i = (de_stack-stackp)) > 0);
358 } else {
359 memcpy(outbuf+outpos, stackp, i);
360 outpos += i;
364 if ((code = free_ent) < maxmaxcode) { /* Generate the new entry. */
366 tab_prefixof(code) = (unsigned short)oldcode;
367 tab_suffixof(code) = (char_type)finchar;
368 free_ent = code+1;
370 oldcode = incode; /* Remember previous code. */
372 } while (rsize != 0);
374 if (!test && outpos > 0) {
375 write_buf(out, (char*)outbuf, outpos);
376 bytes_out += (ulg)outpos;
378 return OK;