1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2 ---*/
3 /*--- bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
6 /* ------------------------------------------------------------------
7 This file is part of bzip2/libbzip2, a program and library for
8 lossless, block-sorting data compression.
10 bzip2/libbzip2 version 1.0.5 of 10 December 2007
11 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13 Please read the WARNING, DISCLAIMER and PATENTS sections in the
16 This program is released under the terms of the license contained
18 ------------------------------------------------------------------ */
20 /* This program is a complete hack and should be rewritten properly.
21 It isn't very complicated. */
29 /* This program records bit locations in the file to be recovered.
30 That means that if 64-bit ints are not supported, we will not
31 be able to recover .bz2 files over 512MB (2^32 bits) long.
32 On GNU supported platforms, we take advantage of the 64-bit
33 int support to circumvent this problem. Ditto MSVC.
35 This change occurred in version 1.0.2; all prior versions have
39 typedef unsigned long long int MaybeUInt64
;
40 # define MaybeUInt64_FMT "%Lu"
43 typedef unsigned __int64 MaybeUInt64
;
44 # define MaybeUInt64_FMT "%I64u"
46 typedef unsigned int MaybeUInt64
;
47 # define MaybeUInt64_FMT "%u"
51 typedef unsigned int UInt32
;
53 typedef unsigned char UChar
;
55 typedef unsigned char Bool
;
56 #define True ((Bool)1)
57 #define False ((Bool)0)
60 #define BZ_MAX_FILENAME 2000
62 Char inFileName
[BZ_MAX_FILENAME
];
63 Char outFileName
[BZ_MAX_FILENAME
];
64 Char progName
[BZ_MAX_FILENAME
];
66 MaybeUInt64 bytesOut
= 0;
67 MaybeUInt64 bytesIn
= 0;
70 /*---------------------------------------------------*/
71 /*--- Header bytes ---*/
72 /*---------------------------------------------------*/
74 #define BZ_HDR_B 0x42 /* 'B' */
75 #define BZ_HDR_Z 0x5a /* 'Z' */
76 #define BZ_HDR_h 0x68 /* 'h' */
77 #define BZ_HDR_0 0x30 /* '0' */
80 /*---------------------------------------------------*/
81 /*--- I/O errors ---*/
82 /*---------------------------------------------------*/
84 /*---------------------------------------------*/
85 static void readError ( void )
88 "%s: I/O error reading `%s', possible reason follows.\n",
89 progName
, inFileName
);
91 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
97 /*---------------------------------------------*/
98 static void writeError ( void )
101 "%s: I/O error reading `%s', possible reason follows.\n",
102 progName
, inFileName
);
104 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
110 /*---------------------------------------------*/
111 static void mallocFail ( Int32 n
)
114 "%s: malloc failed on request for %d bytes.\n",
116 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
122 /*---------------------------------------------*/
123 static void tooManyBlocks ( Int32 max_handled_blocks
)
126 "%s: `%s' appears to contain more than %d blocks\n",
127 progName
, inFileName
, max_handled_blocks
);
129 "%s: and cannot be handled. To fix, increase\n",
132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
139 /*---------------------------------------------------*/
140 /*--- Bit stream I/O ---*/
141 /*---------------------------------------------------*/
153 /*---------------------------------------------*/
154 static BitStream
* bsOpenReadStream ( FILE* stream
)
156 BitStream
*bs
= malloc ( sizeof(BitStream
) );
157 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
166 /*---------------------------------------------*/
167 static BitStream
* bsOpenWriteStream ( FILE* stream
)
169 BitStream
*bs
= malloc ( sizeof(BitStream
) );
170 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
179 /*---------------------------------------------*/
180 static void bsPutBit ( BitStream
* bs
, Int32 bit
)
182 if (bs
->buffLive
== 8) {
183 Int32 retVal
= putc ( (UChar
) bs
->buffer
, bs
->handle
);
184 if (retVal
== EOF
) writeError();
187 bs
->buffer
= bit
& 0x1;
189 bs
->buffer
= ( (bs
->buffer
<< 1) | (bit
& 0x1) );
195 /*---------------------------------------------*/
197 Returns 0 or 1, or 2 to indicate EOF.
199 static Int32
bsGetBit ( BitStream
* bs
)
201 if (bs
->buffLive
> 0) {
203 return ( ((bs
->buffer
) >> (bs
->buffLive
)) & 0x1 );
205 Int32 retVal
= getc ( bs
->handle
);
206 if ( retVal
== EOF
) {
207 if (errno
!= 0) readError();
212 return ( ((bs
->buffer
) >> 7) & 0x1 );
217 /*---------------------------------------------*/
218 static void bsClose ( BitStream
* bs
)
222 if ( bs
->mode
== 'w' ) {
223 while ( bs
->buffLive
< 8 ) {
227 retVal
= putc ( (UChar
) (bs
->buffer
), bs
->handle
);
228 if (retVal
== EOF
) writeError();
230 retVal
= fflush ( bs
->handle
);
231 if (retVal
== EOF
) writeError();
233 retVal
= fclose ( bs
->handle
);
235 if (bs
->mode
== 'w') writeError(); else readError();
241 /*---------------------------------------------*/
242 static void bsPutUChar ( BitStream
* bs
, UChar c
)
245 for (i
= 7; i
>= 0; i
--)
246 bsPutBit ( bs
, (((UInt32
) c
) >> i
) & 0x1 );
250 /*---------------------------------------------*/
251 static void bsPutUInt32 ( BitStream
* bs
, UInt32 c
)
255 for (i
= 31; i
>= 0; i
--)
256 bsPutBit ( bs
, (c
>> i
) & 0x1 );
260 /*---------------------------------------------*/
261 static Bool
endsInBz2 ( Char
* name
)
263 Int32 n
= strlen ( name
);
264 if (n
<= 4) return False
;
273 /*---------------------------------------------------*/
275 /*---------------------------------------------------*/
277 /* This logic isn't really right when it comes to Cygwin. */
279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
284 #define BLOCK_HEADER_HI 0x00003141UL
285 #define BLOCK_HEADER_LO 0x59265359UL
287 #define BLOCK_ENDMARK_HI 0x00001772UL
288 #define BLOCK_ENDMARK_LO 0x45385090UL
290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
291 would have an uncompressed size of at least 40GB, so the chances
292 are low you'll need to up this.
294 #define BZ_MAX_HANDLED_BLOCKS 50000
296 MaybeUInt64 bStart
[BZ_MAX_HANDLED_BLOCKS
];
297 MaybeUInt64 bEnd
[BZ_MAX_HANDLED_BLOCKS
];
298 MaybeUInt64 rbStart
[BZ_MAX_HANDLED_BLOCKS
];
299 MaybeUInt64 rbEnd
[BZ_MAX_HANDLED_BLOCKS
];
301 Int32
main ( Int32 argc
, Char
** argv
)
305 BitStream
* bsIn
, *bsWr
;
306 Int32 b
, wrBlock
, currBlock
, rbCtr
;
307 MaybeUInt64 bitsRead
;
309 UInt32 buffHi
, buffLo
, blockCRC
;
312 strcpy ( progName
, argv
[0] );
313 inFileName
[0] = outFileName
[0] = 0;
316 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
319 fprintf ( stderr
, "%s: usage is `%s damaged_file_name'.\n",
320 progName
, progName
);
321 switch (sizeof(MaybeUInt64
)) {
324 "\trestrictions on size of recovered file: None\n");
328 "\trestrictions on size of recovered file: 512 MB\n");
330 "\tto circumvent, recompile with MaybeUInt64 as an\n"
331 "\tunsigned 64-bit int.\n");
335 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
336 "configuration error.\n");
342 if (strlen(argv
[1]) >= BZ_MAX_FILENAME
-20) {
344 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
345 progName
, (int)strlen(argv
[1]) );
349 strcpy ( inFileName
, argv
[1] );
351 inFile
= fopen ( inFileName
, "rb" );
352 if (inFile
== NULL
) {
353 fprintf ( stderr
, "%s: can't read `%s'\n", progName
, inFileName
);
357 bsIn
= bsOpenReadStream ( inFile
);
358 fprintf ( stderr
, "%s: searching for block boundaries ...\n", progName
);
363 bStart
[currBlock
] = 0;
368 b
= bsGetBit ( bsIn
);
371 if (bitsRead
>= bStart
[currBlock
] &&
372 (bitsRead
- bStart
[currBlock
]) >= 40) {
373 bEnd
[currBlock
] = bitsRead
-1;
375 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
376 " to " MaybeUInt64_FMT
" (incomplete)\n",
377 currBlock
, bStart
[currBlock
], bEnd
[currBlock
] );
382 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
383 buffLo
= (buffLo
<< 1) | (b
& 1);
384 if ( ( (buffHi
& 0x0000ffff) == BLOCK_HEADER_HI
385 && buffLo
== BLOCK_HEADER_LO
)
387 ( (buffHi
& 0x0000ffff) == BLOCK_ENDMARK_HI
388 && buffLo
== BLOCK_ENDMARK_LO
)
391 bEnd
[currBlock
] = bitsRead
-49;
396 (bEnd
[currBlock
] - bStart
[currBlock
]) >= 130) {
397 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
398 " to " MaybeUInt64_FMT
"\n",
399 rbCtr
+1, bStart
[currBlock
], bEnd
[currBlock
] );
400 rbStart
[rbCtr
] = bStart
[currBlock
];
401 rbEnd
[rbCtr
] = bEnd
[currBlock
];
404 if (currBlock
>= BZ_MAX_HANDLED_BLOCKS
)
405 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS
);
408 bStart
[currBlock
] = bitsRead
;
414 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
418 "%s: sorry, I couldn't find any block boundaries.\n",
423 fprintf ( stderr
, "%s: splitting into blocks\n", progName
);
425 inFile
= fopen ( inFileName
, "rb" );
426 if (inFile
== NULL
) {
427 fprintf ( stderr
, "%s: can't open `%s'\n", progName
, inFileName
);
430 bsIn
= bsOpenReadStream ( inFile
);
432 /*-- placate gcc's dataflow analyser --*/
433 blockCRC
= 0; bsWr
= 0;
441 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
442 buffLo
= (buffLo
<< 1) | (b
& 1);
443 if (bitsRead
== 47+rbStart
[wrBlock
])
444 blockCRC
= (buffHi
<< 16) | (buffLo
>> 16);
446 if (outFile
!= NULL
&& bitsRead
>= rbStart
[wrBlock
]
447 && bitsRead
<= rbEnd
[wrBlock
]) {
448 bsPutBit ( bsWr
, b
);
453 if (bitsRead
== rbEnd
[wrBlock
]+1) {
454 if (outFile
!= NULL
) {
455 bsPutUChar ( bsWr
, 0x17 ); bsPutUChar ( bsWr
, 0x72 );
456 bsPutUChar ( bsWr
, 0x45 ); bsPutUChar ( bsWr
, 0x38 );
457 bsPutUChar ( bsWr
, 0x50 ); bsPutUChar ( bsWr
, 0x90 );
458 bsPutUInt32 ( bsWr
, blockCRC
);
461 if (wrBlock
>= rbCtr
) break;
464 if (bitsRead
== rbStart
[wrBlock
]) {
465 /* Create the output file name, correctly handling leading paths.
466 (31.10.2001 by Sergey E. Kusikov) */
469 for (k
= 0; k
< BZ_MAX_FILENAME
; k
++)
471 strcpy (outFileName
, inFileName
);
472 split
= strrchr (outFileName
, BZ_SPLIT_SYM
);
478 /* Now split points to the start of the basename. */
479 ofs
= split
- outFileName
;
480 sprintf (split
, "rec%5d", wrBlock
+1);
481 for (p
= split
; *p
!= 0; p
++) if (*p
== ' ') *p
= '0';
482 strcat (outFileName
, inFileName
+ ofs
);
484 if ( !endsInBz2(outFileName
)) strcat ( outFileName
, ".bz2" );
486 fprintf ( stderr
, " writing block %d to `%s' ...\n",
487 wrBlock
+1, outFileName
);
489 outFile
= fopen ( outFileName
, "wb" );
490 if (outFile
== NULL
) {
491 fprintf ( stderr
, "%s: can't write `%s'\n",
492 progName
, outFileName
);
495 bsWr
= bsOpenWriteStream ( outFile
);
496 bsPutUChar ( bsWr
, BZ_HDR_B
);
497 bsPutUChar ( bsWr
, BZ_HDR_Z
);
498 bsPutUChar ( bsWr
, BZ_HDR_h
);
499 bsPutUChar ( bsWr
, BZ_HDR_0
+ 9 );
500 bsPutUChar ( bsWr
, 0x31 ); bsPutUChar ( bsWr
, 0x41 );
501 bsPutUChar ( bsWr
, 0x59 ); bsPutUChar ( bsWr
, 0x26 );
502 bsPutUChar ( bsWr
, 0x53 ); bsPutUChar ( bsWr
, 0x59 );
506 fprintf ( stderr
, "%s: finished\n", progName
);
512 /*-----------------------------------------------------------*/
513 /*--- end bzip2recover.c ---*/
514 /*-----------------------------------------------------------*/