2 /* Slightly modified from its original form so as not to exit the
3 * program on errors. The resulting file remains in the public
4 * domain for all to use. */
6 /* --- GZIP file format uncompression routines --- */
8 /* The following routines (notably the unzip()) function below
9 * uncompress gzipped data. They are terribly slow at the task, but
10 * it is presumed that they work reasonably well. They don't do any
11 * error checking, but they're probably not too vulnerable to buggy
12 * data either. Another important limitation (but it would be pretty
13 * easy to get around) is that the data must reside in memory, it is
14 * not read as a stream. They have been very little tested. Anyway,
15 * whatever these functions are good for, I put them in the public
16 * domain. -- David Madore <david.madore@ens.fr> 1999/11/21 */
19 peek_bits (const unsigned char *data
, long p
, int q
)
20 /* Read q bits starting from bit p from the data pointed to by
21 * data. Data is in little-endian format. */
24 int cnt
; /* Number of bits already placed in answer */
25 char ob
, lb
; /* Offset and length of bit field within current byte */
28 for ( cnt
=0 ; cnt
<q
; /* cnt updated in body */ )
34 answer
|= ((unsigned int)((data
[(p
+cnt
)/8]>>ob
)&((1U<<lb
)-1)))<<cnt
;
41 read_bits (const unsigned char *data
, long *p
, int q
)
42 /* Read q bits as per peek_bits(), but also increase p by q. */
46 answer
= peek_bits (data
, *p
, q
);
52 make_code_table (const char size_table
[], int table_length
,
53 unsigned int code_table
[], int maxbits
)
54 /* Make a code table from a length table. See rfc1951, section
55 * 3.2.2, for details on what this means. The size_table
56 * contains the length of the Huffman codes for each letter, and
57 * the code_table receives the computed codes themselves.
58 * table_length is the size of the tables (alphabet length) and
59 * maxbits is the maximal allowed code length. */
65 for ( i
=1 ; i
<=maxbits
; i
++ )
67 for ( j
=0 ; j
<table_length
; j
++ )
69 if ( size_table
[j
]==i
)
70 code_table
[j
] = code
++;
77 decode_one (const unsigned char *data
, long *p
,
78 const char size_table
[], int table_length
,
79 const unsigned int code_table
[], int maxbits
)
80 /* Decode one alphabet letter from the data, starting at bit p
81 * (which will be increased by the appropriate amount) using
82 * size_table and code_table to decipher the Huffman encoding. */
88 /* Read as many bits as are likely to be necessary - backward, of
90 for ( i
=0 ; i
<maxbits
; i
++ )
91 code
= (code
<<1) + peek_bits (data
, (*p
)+i
, 1);
92 /* Now examine each symbol of the table to find one that matches the
93 * first bits of the code read. */
94 for ( j
=0 ; j
<table_length
; j
++ )
97 && ( (code
>>(maxbits
-size_table
[j
])) == code_table
[j
] ) )
106 /* I don't know what these should be. The rfc1951 doesn't seem to say
107 * (it only mentions them in the last paragraph of section 3.2.1). 15
108 * is almost certainly safe, and it is the largest I can put given the
109 * constraints on the size of integers in the C standard. */
110 #define CLEN_MAXBITS 15
111 #define HLIT_MAXBITS 15
112 #define HDIST_MAXBITS 15
114 /* The magical table sizes... */
115 #define CLEN_TSIZE 19
116 #define HLIT_TSIZE 288
117 #define HDIST_TSIZE 30
120 get_tables (const unsigned char *data
, long *p
,
121 char hlit_size_table
[HLIT_TSIZE
],
122 unsigned int hlit_code_table
[HLIT_TSIZE
],
123 char hdist_size_table
[HDIST_TSIZE
],
124 unsigned int hdist_code_table
[HDIST_TSIZE
])
125 /* Fill the Huffman tables (first the code lengths table, and
126 * then, using it, the literal/length table and the distance
127 * table). See section 3.2.7 of rfc1951 for details. */
129 char hlit
, hdist
, hclen
;
130 const int clen_weird_tangle
[CLEN_TSIZE
]
131 = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
132 char clen_size_table
[CLEN_TSIZE
];
133 unsigned int clen_code_table
[CLEN_TSIZE
];
136 int remainder
; /* See note at end of section 3.2.7 of rfc1951. */
139 hlit
= read_bits (data
, p
, 5);
140 hdist
= read_bits (data
, p
, 5);
141 hclen
= read_bits (data
, p
, 4);
142 for ( j
=0 ; j
<4+hclen
; j
++ )
143 clen_size_table
[clen_weird_tangle
[j
]]
144 = read_bits (data
, p
, 3);
145 for ( ; j
<CLEN_TSIZE
; j
++ )
146 clen_size_table
[clen_weird_tangle
[j
]] = 0;
147 make_code_table (clen_size_table
, CLEN_TSIZE
,
148 clen_code_table
, CLEN_MAXBITS
);
151 for ( j
=0 ; j
<257+hlit
; j
++ )
153 b
= decode_one (data
, p
, clen_size_table
, CLEN_TSIZE
,
154 clen_code_table
, CLEN_MAXBITS
);
155 if ( b
<0 ) return -1;
157 hlit_size_table
[j
] = b
;
162 k
= read_bits (data
, p
, 2);
163 for ( l
=0 ; l
<k
+3 && j
+l
<257+hlit
; l
++ )
164 hlit_size_table
[j
+l
] = hlit_size_table
[j
-1];
166 remainder
= k
+3-l
; /* THIS IS SO UGLY! */
167 rem_val
= hlit_size_table
[j
-1];
173 k
= read_bits (data
, p
, 3);
174 for ( l
=0 ; l
<k
+3 && j
+l
<257+hlit
; l
++ )
175 hlit_size_table
[j
+l
] = 0;
184 k
= read_bits (data
, p
, 7);
185 for ( l
=0 ; l
<k
+11 && j
+l
<257+hlit
; l
++ )
186 hlit_size_table
[j
+l
] = 0;
192 for ( ; j
<HLIT_TSIZE
; j
++ )
193 hlit_size_table
[j
] = 0;
194 make_code_table (hlit_size_table
, HLIT_TSIZE
,
195 hlit_code_table
, HLIT_MAXBITS
);
196 for ( j
=0 ; j
<remainder
; j
++ )
197 hdist_size_table
[j
] = rem_val
;
198 for ( ; j
<1+hdist
; j
++ )
199 /* Can you spell: ``copy-paste''? */
201 b
= decode_one (data
, p
, clen_size_table
, CLEN_TSIZE
,
202 clen_code_table
, CLEN_MAXBITS
);
203 if ( b
<0 ) return -1;
205 hdist_size_table
[j
] = b
;
210 k
= read_bits (data
, p
, 2);
211 for ( l
=0 ; l
<k
+3 && j
+l
<1+hdist
; l
++ )
212 hdist_size_table
[j
+l
] = hdist_size_table
[j
-1];
219 k
= read_bits (data
, p
, 3);
220 for ( l
=0 ; l
<k
+3 && j
+l
<1+hdist
; l
++ )
221 hdist_size_table
[j
+l
] = 0;
228 k
= read_bits (data
, p
, 7);
229 for ( l
=0 ; l
<k
+11 && j
+l
<1+hdist
; l
++ )
230 hdist_size_table
[j
+l
] = 0;
234 for ( ; j
<HDIST_TSIZE
; j
++ )
235 hdist_size_table
[j
] = 0;
236 make_code_table (hdist_size_table
, HDIST_TSIZE
,
237 hdist_code_table
, HDIST_MAXBITS
);
241 /* The (circular) output buffer. This lets us track
244 /* Minimal buffer size. Also the only useful value. */
245 #define BUFFER_SIZE 32768
247 /* Pointer to the character to be added to the buffer */
248 static unsigned int buffer_ptr
= 0;
250 /* The buffer itself */
251 static unsigned char buffer
[BUFFER_SIZE
];
254 pushout (unsigned char ch
)
255 /* Store one byte in the output buffer so it may be retrieved if
256 * it is referenced again. */
258 buffer
[buffer_ptr
++] = ch
;
259 buffer_ptr
%= BUFFER_SIZE
;
263 pushin (unsigned int dist
)
264 /* Retrieve one byte, dist bytes away, from the output buffer. */
266 return buffer
[(buffer_ptr
+(BUFFER_SIZE
-dist
))%BUFFER_SIZE
];
270 get_data (const unsigned char *data
, long *p
,
271 const char hlit_size_table
[HLIT_TSIZE
],
272 const unsigned int hlit_code_table
[HLIT_TSIZE
],
273 const char hdist_size_table
[HDIST_TSIZE
],
274 const unsigned int hdist_code_table
[HDIST_TSIZE
],
275 void (* callback
) (unsigned char d
))
276 /* Do the actual uncompressing. Call callback on each character
282 b
= decode_one (data
, p
, hlit_size_table
, HLIT_TSIZE
,
283 hlit_code_table
, HLIT_MAXBITS
);
284 if ( b
<0 ) return -1;
288 pushout ((unsigned char) b
);
289 callback ((unsigned char) b
);
298 unsigned int length
, dist
;
303 case 257: length
= 3; break;
304 case 258: length
= 4; break;
305 case 259: length
= 5; break;
306 case 260: length
= 6; break;
307 case 261: length
= 7; break;
308 case 262: length
= 8; break;
309 case 263: length
= 9; break;
310 case 264: length
= 10; break;
311 case 265: length
= 11 + read_bits (data
, p
, 1); break;
312 case 266: length
= 13 + read_bits (data
, p
, 1); break;
313 case 267: length
= 15 + read_bits (data
, p
, 1); break;
314 case 268: length
= 17 + read_bits (data
, p
, 1); break;
315 case 269: length
= 19 + read_bits (data
, p
, 2); break;
316 case 270: length
= 23 + read_bits (data
, p
, 2); break;
317 case 271: length
= 27 + read_bits (data
, p
, 2); break;
318 case 272: length
= 31 + read_bits (data
, p
, 2); break;
319 case 273: length
= 35 + read_bits (data
, p
, 3); break;
320 case 274: length
= 43 + read_bits (data
, p
, 3); break;
321 case 275: length
= 51 + read_bits (data
, p
, 3); break;
322 case 276: length
= 59 + read_bits (data
, p
, 3); break;
323 case 277: length
= 67 + read_bits (data
, p
, 4); break;
324 case 278: length
= 83 + read_bits (data
, p
, 4); break;
325 case 279: length
= 99 + read_bits (data
, p
, 4); break;
326 case 280: length
= 115 + read_bits (data
, p
, 4); break;
327 case 281: length
= 131 + read_bits (data
, p
, 5); break;
328 case 282: length
= 163 + read_bits (data
, p
, 5); break;
329 case 283: length
= 195 + read_bits (data
, p
, 5); break;
330 case 284: length
= 227 + read_bits (data
, p
, 5); break;
331 case 285: length
= 258; break;
335 bb
= decode_one (data
, p
, hdist_size_table
, HDIST_TSIZE
,
336 hdist_code_table
, HDIST_MAXBITS
);
339 case 0: dist
= 1; break;
340 case 1: dist
= 2; break;
341 case 2: dist
= 3; break;
342 case 3: dist
= 4; break;
343 case 4: dist
= 5 + read_bits (data
, p
, 1); break;
344 case 5: dist
= 7 + read_bits (data
, p
, 1); break;
345 case 6: dist
= 9 + read_bits (data
, p
, 2); break;
346 case 7: dist
= 13 + read_bits (data
, p
, 2); break;
347 case 8: dist
= 17 + read_bits (data
, p
, 3); break;
348 case 9: dist
= 25 + read_bits (data
, p
, 3); break;
349 case 10: dist
= 33 + read_bits (data
, p
, 4); break;
350 case 11: dist
= 49 + read_bits (data
, p
, 4); break;
351 case 12: dist
= 65 + read_bits (data
, p
, 5); break;
352 case 13: dist
= 97 + read_bits (data
, p
, 5); break;
353 case 14: dist
= 129 + read_bits (data
, p
, 6); break;
354 case 15: dist
= 193 + read_bits (data
, p
, 6); break;
355 case 16: dist
= 257 + read_bits (data
, p
, 7); break;
356 case 17: dist
= 385 + read_bits (data
, p
, 7); break;
357 case 18: dist
= 513 + read_bits (data
, p
, 8); break;
358 case 19: dist
= 769 + read_bits (data
, p
, 8); break;
359 case 20: dist
= 1025 + read_bits (data
, p
, 9); break;
360 case 21: dist
= 1537 + read_bits (data
, p
, 9); break;
361 case 22: dist
= 2049 + read_bits (data
, p
, 10); break;
362 case 23: dist
= 3073 + read_bits (data
, p
, 10); break;
363 case 24: dist
= 4097 + read_bits (data
, p
, 11); break;
364 case 25: dist
= 6145 + read_bits (data
, p
, 11); break;
365 case 26: dist
= 8193 + read_bits (data
, p
, 12); break;
366 case 27: dist
= 12289 + read_bits (data
, p
, 12); break;
367 case 28: dist
= 16385 + read_bits (data
, p
, 13); break;
368 case 29: dist
= 24577 + read_bits (data
, p
, 13); break;
372 for ( l
=0 ; l
<length
; l
++ )
386 inflate (const unsigned char *data
, long *p
,
387 void (* callback
) (unsigned char d
))
388 /* Main uncompression function for the deflate method */
391 char hlit_size_table
[HLIT_TSIZE
];
392 unsigned int hlit_code_table
[HLIT_TSIZE
];
393 char hdist_size_table
[HDIST_TSIZE
];
394 unsigned int hdist_code_table
[HDIST_TSIZE
];
397 blast
= read_bits (data
, p
, 1);
398 btype
= read_bits (data
, p
, 2);
399 if ( btype
== 1 || btype
== 2 )
403 /* Dynamic Huffman tables */
404 if (get_tables (data
, p
,
405 hlit_size_table
, hlit_code_table
,
406 hdist_size_table
, hdist_code_table
) < 0) return -1;
409 /* Fixed Huffman codes */
413 for ( j
=0 ; j
<144 ; j
++ )
414 hlit_size_table
[j
] = 8;
415 for ( ; j
<256 ; j
++ )
416 hlit_size_table
[j
] = 9;
417 for ( ; j
<280 ; j
++ )
418 hlit_size_table
[j
] = 7;
419 for ( ; j
<HLIT_TSIZE
; j
++ )
420 hlit_size_table
[j
] = 8;
421 make_code_table (hlit_size_table
, HLIT_TSIZE
,
422 hlit_code_table
, HLIT_MAXBITS
);
423 for ( j
=0 ; j
<HDIST_TSIZE
; j
++ )
424 hdist_size_table
[j
] = 5;
425 make_code_table (hdist_size_table
, HDIST_TSIZE
,
426 hdist_code_table
, HDIST_MAXBITS
);
428 if (get_data (data
, p
,
429 hlit_size_table
, hlit_code_table
,
430 hdist_size_table
, hdist_code_table
,
431 callback
) < 0) return -1;;
433 else if ( btype
== 0 )
434 /* Non compressed block */
436 unsigned int len
, nlen
;
440 *p
= (*p
+7)/8; /* Jump to next byte boundary */
441 len
= read_bits (data
, p
, 16);
442 nlen
= read_bits (data
, p
, 16);
443 for ( l
=0 ; l
<len
; l
++ )
445 b
= read_bits (data
, p
, 8);
460 unzip (const unsigned char *data
, long *p
,
461 void (* callback
) (unsigned char d
))
462 /* Uncompress gzipped data. data is a pointer to the data, p is
463 * a pointer to a long that is initialized to 0 (unless for some
464 * reason you want to start uncompressing further down the data),
465 * and callback is a function taking an unsigned char and
466 * returning void that will be called successively for every
467 * uncompressed byte. */
469 unsigned char cm
, flg
;
471 if ( read_bits (data
, p
, 8) != 0x1f
472 || read_bits (data
, p
, 8) != 0x8b )
476 cm
= read_bits (data
, p
, 8);
481 flg
= read_bits (data
, p
, 8);
483 /* fprintf (stderr, "Warning: unknown bits are set in flags.\n") */ ;
484 read_bits (data
, p
, 32); /* Ignore modification time */
485 read_bits (data
, p
, 8); /* Ignore extra flags */
486 read_bits (data
, p
, 8); /* Ignore OS type */
489 /* Skip over extra data */
492 xlen
= read_bits (data
, p
, 16);
493 *p
+= ((long)xlen
)*8;
497 /* Skip over file name */
498 while ( read_bits (data
, p
, 8) );
502 /* Skip over comment */
503 while ( read_bits (data
, p
, 8) );
507 read_bits (data
, p
, 16);
508 return inflate (data
, p
, callback
);
509 /* CRC32 and ISIZE are at the end. We don't even bother to look at