Sync usage with man page.
[netbsd-mini2440.git] / dist / bzip2 / bzip2recover.c
blobcfad9842c611acea024feebbcdb8610ca0db5c43
1 /* $NetBSD: bzip2recover.c,v 1.6 2008/03/18 14:47:07 christos Exp $ */
4 /*-----------------------------------------------------------*/
5 /*--- Block recoverer program for bzip2 ---*/
6 /*--- bzip2recover.c ---*/
7 /*-----------------------------------------------------------*/
9 /* ------------------------------------------------------------------
10 This file is part of bzip2/libbzip2, a program and library for
11 lossless, block-sorting data compression.
13 bzip2/libbzip2 version 1.0.5 of 10 December 2007
14 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
16 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 README file.
19 This program is released under the terms of the license contained
20 in the file LICENSE.
21 ------------------------------------------------------------------ */
23 /* This program is a complete hack and should be rewritten properly.
24 It isn't very complicated. */
26 #include <stdio.h>
27 #include <errno.h>
28 #include <stdlib.h>
29 #include <string.h>
32 /* This program records bit locations in the file to be recovered.
33 That means that if 64-bit ints are not supported, we will not
34 be able to recover .bz2 files over 512MB (2^32 bits) long.
35 On GNU supported platforms, we take advantage of the 64-bit
36 int support to circumvent this problem. Ditto MSVC.
38 This change occurred in version 1.0.2; all prior versions have
39 the 512MB limitation.
41 #ifdef __GNUC__
42 typedef unsigned long long int MaybeUInt64;
43 # define MaybeUInt64_FMT "%Lu"
44 #else
45 #ifdef _MSC_VER
46 typedef unsigned __int64 MaybeUInt64;
47 # define MaybeUInt64_FMT "%I64u"
48 #else
49 typedef unsigned int MaybeUInt64;
50 # define MaybeUInt64_FMT "%u"
51 #endif
52 #endif
54 typedef unsigned int UInt32;
55 typedef int Int32;
56 typedef unsigned char UChar;
57 typedef char Char;
58 typedef unsigned char Bool;
59 #define True ((Bool)1)
60 #define False ((Bool)0)
63 #define BZ_MAX_FILENAME 2000
65 Char inFileName[BZ_MAX_FILENAME];
66 Char outFileName[BZ_MAX_FILENAME];
67 Char progName[BZ_MAX_FILENAME];
69 MaybeUInt64 bytesOut = 0;
70 MaybeUInt64 bytesIn = 0;
72 /*---------------------------------------------------*/
73 /*--- Bit stream I/O ---*/
74 /*---------------------------------------------------*/
76 typedef
77 struct {
78 FILE* handle;
79 Int32 buffer;
80 Int32 buffLive;
81 Char mode;
83 BitStream;
85 static void readError ( void );
86 static void writeError ( void );
87 static void mallocFail ( Int32 n );
88 static BitStream* bsOpenReadStream ( FILE* stream );
89 static BitStream* bsOpenWriteStream ( FILE* stream );
90 static void bsPutBit ( BitStream* bs, Int32 bit );
91 static Int32 bsGetBit ( BitStream* bs );
92 static void bsClose ( BitStream* bs );
93 static void bsPutUChar ( BitStream* bs, UChar c );
94 static void bsPutUInt32 ( BitStream* bs, UInt32 c );
95 static Bool endsInBz2 ( Char* name );
96 static void tooManyBlocks ( Int32 max_handled_blocks );
99 /*---------------------------------------------------*/
100 /*--- Header bytes ---*/
101 /*---------------------------------------------------*/
103 #define BZ_HDR_B 0x42 /* 'B' */
104 #define BZ_HDR_Z 0x5a /* 'Z' */
105 #define BZ_HDR_h 0x68 /* 'h' */
106 #define BZ_HDR_0 0x30 /* '0' */
109 /*---------------------------------------------------*/
110 /*--- I/O errors ---*/
111 /*---------------------------------------------------*/
113 /*---------------------------------------------*/
114 static void readError ( void )
116 fprintf ( stderr,
117 "%s: I/O error reading `%s', possible reason follows.\n",
118 progName, inFileName );
119 perror ( progName );
120 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
121 progName );
122 exit ( 1 );
126 /*---------------------------------------------*/
127 static void writeError ( void )
129 fprintf ( stderr,
130 "%s: I/O error reading `%s', possible reason follows.\n",
131 progName, inFileName );
132 perror ( progName );
133 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
134 progName );
135 exit ( 1 );
139 /*---------------------------------------------*/
140 static void mallocFail ( Int32 n )
142 fprintf ( stderr,
143 "%s: malloc failed on request for %d bytes.\n",
144 progName, n );
145 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
146 progName );
147 exit ( 1 );
151 /*---------------------------------------------*/
152 static void tooManyBlocks ( Int32 max_handled_blocks )
154 fprintf ( stderr,
155 "%s: `%s' appears to contain more than %d blocks\n",
156 progName, inFileName, max_handled_blocks );
157 fprintf ( stderr,
158 "%s: and cannot be handled. To fix, increase\n",
159 progName );
160 fprintf ( stderr,
161 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
162 progName );
163 exit ( 1 );
168 /*---------------------------------------------*/
169 static BitStream* bsOpenReadStream ( FILE* stream )
171 BitStream *bs = malloc ( sizeof(BitStream) );
172 if (bs == NULL) mallocFail ( sizeof(BitStream) );
173 bs->handle = stream;
174 bs->buffer = 0;
175 bs->buffLive = 0;
176 bs->mode = 'r';
177 return bs;
181 /*---------------------------------------------*/
182 static BitStream* bsOpenWriteStream ( FILE* stream )
184 BitStream *bs = malloc ( sizeof(BitStream) );
185 if (bs == NULL) mallocFail ( sizeof(BitStream) );
186 bs->handle = stream;
187 bs->buffer = 0;
188 bs->buffLive = 0;
189 bs->mode = 'w';
190 return bs;
194 /*---------------------------------------------*/
195 static void bsPutBit ( BitStream* bs, Int32 bit )
197 if (bs->buffLive == 8) {
198 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
199 if (retVal == EOF) writeError();
200 bytesOut++;
201 bs->buffLive = 1;
202 bs->buffer = bit & 0x1;
203 } else {
204 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
205 bs->buffLive++;
210 /*---------------------------------------------*/
211 /*--
212 Returns 0 or 1, or 2 to indicate EOF.
213 --*/
214 static Int32 bsGetBit ( BitStream* bs )
216 if (bs->buffLive > 0) {
217 bs->buffLive --;
218 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
219 } else {
220 Int32 retVal = getc ( bs->handle );
221 if ( retVal == EOF ) {
222 if (errno != 0) readError();
223 return 2;
225 bs->buffLive = 7;
226 bs->buffer = retVal;
227 return ( ((bs->buffer) >> 7) & 0x1 );
232 /*---------------------------------------------*/
233 static void bsClose ( BitStream* bs )
235 Int32 retVal;
237 if ( bs->mode == 'w' ) {
238 while ( bs->buffLive < 8 ) {
239 bs->buffLive++;
240 bs->buffer <<= 1;
242 retVal = putc ( (UChar) (bs->buffer), bs->handle );
243 if (retVal == EOF) writeError();
244 bytesOut++;
245 retVal = fflush ( bs->handle );
246 if (retVal == EOF) writeError();
248 retVal = fclose ( bs->handle );
249 if (retVal == EOF) {
250 if (bs->mode == 'w') writeError(); else readError();
252 free ( bs );
256 /*---------------------------------------------*/
257 static void bsPutUChar ( BitStream* bs, UChar c )
259 Int32 i;
260 for (i = 7; i >= 0; i--)
261 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
265 /*---------------------------------------------*/
266 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
268 Int32 i;
270 for (i = 31; i >= 0; i--)
271 bsPutBit ( bs, (c >> i) & 0x1 );
275 /*---------------------------------------------*/
276 static Bool endsInBz2 ( Char* name )
278 Int32 n = strlen ( name );
279 if (n <= 4) return False;
280 return
281 (name[n-4] == '.' &&
282 name[n-3] == 'b' &&
283 name[n-2] == 'z' &&
284 name[n-1] == '2');
288 /*---------------------------------------------------*/
289 /*--- ---*/
290 /*---------------------------------------------------*/
292 /* This logic isn't really right when it comes to Cygwin. */
293 #ifdef _WIN32
294 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
295 #else
296 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
297 #endif
299 #define BLOCK_HEADER_HI 0x00003141UL
300 #define BLOCK_HEADER_LO 0x59265359UL
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
305 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
306 would have an uncompressed size of at least 40GB, so the chances
307 are low you'll need to up this.
309 #define BZ_MAX_HANDLED_BLOCKS 50000
311 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
313 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
316 Int32 main ( Int32 argc, Char** argv )
318 FILE* inFile;
319 FILE* outFile;
320 BitStream* bsIn, *bsWr;
321 Int32 b, wrBlock, currBlock, rbCtr;
322 MaybeUInt64 bitsRead;
324 UInt32 buffHi, buffLo, blockCRC;
325 Char* p;
327 strcpy ( progName, argv[0] );
328 inFileName[0] = outFileName[0] = 0;
330 fprintf ( stderr,
331 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
333 if (argc != 2) {
334 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
335 progName, progName );
336 switch (sizeof(MaybeUInt64)) {
337 case 8:
338 fprintf(stderr,
339 "\trestrictions on size of recovered file: None\n");
340 break;
341 case 4:
342 fprintf(stderr,
343 "\trestrictions on size of recovered file: 512 MB\n");
344 fprintf(stderr,
345 "\tto circumvent, recompile with MaybeUInt64 as an\n"
346 "\tunsigned 64-bit int.\n");
347 break;
348 default:
349 fprintf(stderr,
350 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
351 "configuration error.\n");
352 break;
354 exit(1);
357 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
358 fprintf ( stderr,
359 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
360 progName, (int)strlen(argv[1]) );
361 exit(1);
364 strcpy ( inFileName, argv[1] );
366 inFile = fopen ( inFileName, "rb" );
367 if (inFile == NULL) {
368 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
369 exit(1);
372 bsIn = bsOpenReadStream ( inFile );
373 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
375 bitsRead = 0;
376 buffHi = buffLo = 0;
377 currBlock = 0;
378 bStart[currBlock] = 0;
380 rbCtr = 0;
382 while (True) {
383 b = bsGetBit ( bsIn );
384 bitsRead++;
385 if (b == 2) {
386 if (bitsRead >= bStart[currBlock] &&
387 (bitsRead - bStart[currBlock]) >= 40) {
388 bEnd[currBlock] = bitsRead-1;
389 if (currBlock > 0)
390 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
391 " to " MaybeUInt64_FMT " (incomplete)\n",
392 currBlock, bStart[currBlock], bEnd[currBlock] );
393 } else
394 currBlock--;
395 break;
397 buffHi = (buffHi << 1) | (buffLo >> 31);
398 buffLo = (buffLo << 1) | (b & 1);
399 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
400 && buffLo == BLOCK_HEADER_LO)
402 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
403 && buffLo == BLOCK_ENDMARK_LO)
405 if (bitsRead > 49) {
406 bEnd[currBlock] = bitsRead-49;
407 } else {
408 bEnd[currBlock] = 0;
410 if (currBlock > 0 &&
411 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
412 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
413 " to " MaybeUInt64_FMT "\n",
414 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
415 rbStart[rbCtr] = bStart[currBlock];
416 rbEnd[rbCtr] = bEnd[currBlock];
417 rbCtr++;
419 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
420 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
421 currBlock++;
423 bStart[currBlock] = bitsRead;
427 bsClose ( bsIn );
429 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
431 if (rbCtr < 1) {
432 fprintf ( stderr,
433 "%s: sorry, I couldn't find any block boundaries.\n",
434 progName );
435 exit(1);
438 fprintf ( stderr, "%s: splitting into blocks\n", progName );
440 inFile = fopen ( inFileName, "rb" );
441 if (inFile == NULL) {
442 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
443 exit(1);
445 bsIn = bsOpenReadStream ( inFile );
447 /*-- placate gcc's dataflow analyser --*/
448 blockCRC = 0; bsWr = 0;
450 bitsRead = 0;
451 outFile = NULL;
452 wrBlock = 0;
453 while (True) {
454 b = bsGetBit(bsIn);
455 if (b == 2) break;
456 buffHi = (buffHi << 1) | (buffLo >> 31);
457 buffLo = (buffLo << 1) | (b & 1);
458 if (bitsRead == 47+rbStart[wrBlock])
459 blockCRC = (buffHi << 16) | (buffLo >> 16);
461 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
462 && bitsRead <= rbEnd[wrBlock]) {
463 bsPutBit ( bsWr, b );
466 bitsRead++;
468 if (bitsRead == rbEnd[wrBlock]+1) {
469 if (outFile != NULL) {
470 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
471 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
472 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
473 bsPutUInt32 ( bsWr, blockCRC );
474 bsClose ( bsWr );
476 if (wrBlock >= rbCtr) break;
477 wrBlock++;
478 } else
479 if (bitsRead == rbStart[wrBlock]) {
480 /* Create the output file name, correctly handling leading paths.
481 (31.10.2001 by Sergey E. Kusikov) */
482 Char* split;
483 Int32 ofs, k;
484 for (k = 0; k < BZ_MAX_FILENAME; k++)
485 outFileName[k] = 0;
486 strcpy (outFileName, inFileName);
487 split = strrchr (outFileName, BZ_SPLIT_SYM);
488 if (split == NULL) {
489 split = outFileName;
490 } else {
491 ++split;
493 /* Now split points to the start of the basename. */
494 ofs = split - outFileName;
495 sprintf (split, "rec%5d", wrBlock+1);
496 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
497 strcat (outFileName, inFileName + ofs);
499 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
501 fprintf ( stderr, " writing block %d to `%s' ...\n",
502 wrBlock+1, outFileName );
504 outFile = fopen ( outFileName, "wb" );
505 if (outFile == NULL) {
506 fprintf ( stderr, "%s: can't write `%s'\n",
507 progName, outFileName );
508 exit(1);
510 bsWr = bsOpenWriteStream ( outFile );
511 bsPutUChar ( bsWr, BZ_HDR_B );
512 bsPutUChar ( bsWr, BZ_HDR_Z );
513 bsPutUChar ( bsWr, BZ_HDR_h );
514 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
515 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
516 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
517 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
521 fprintf ( stderr, "%s: finished\n", progName );
522 return 0;
527 /*-----------------------------------------------------------*/
528 /*--- end bzip2recover.c ---*/
529 /*-----------------------------------------------------------*/