1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2 ---*/
3 /*--- bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
6 /* ------------------------------------------------------------------
7 This file is part of bzip2/libbzip2, a program and library for
8 lossless, block-sorting data compression.
10 bzip2/libbzip2 version 1.0.5 of 10 December 2007
11 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13 Please read the WARNING, DISCLAIMER and PATENTS sections in the
16 This program is released under the terms of the license contained
18 ------------------------------------------------------------------ */
20 /* This program is a complete hack and should be rewritten properly.
21 It isn't very complicated. */
31 /* This program records bit locations in the file to be recovered.
32 That means that if 64-bit ints are not supported, we will not
33 be able to recover .bz2 files over 512MB (2^32 bits) long.
34 On GNU supported platforms, we take advantage of the 64-bit
35 int support to circumvent this problem. Ditto MSVC.
37 This change occurred in version 1.0.2; all prior versions have
41 typedef unsigned long long int MaybeUInt64
;
42 # define MaybeUInt64_FMT "%Lu"
45 typedef unsigned __int64 MaybeUInt64
;
46 # define MaybeUInt64_FMT "%I64u"
48 typedef unsigned int MaybeUInt64
;
49 # define MaybeUInt64_FMT "%u"
53 typedef unsigned int UInt32
;
55 typedef unsigned char UChar
;
57 typedef unsigned char Bool
;
58 #define True ((Bool)1)
59 #define False ((Bool)0)
62 #define BZ_MAX_FILENAME 2000
64 Char inFileName
[BZ_MAX_FILENAME
];
65 Char outFileName
[BZ_MAX_FILENAME
];
66 Char progName
[BZ_MAX_FILENAME
];
68 MaybeUInt64 bytesOut
= 0;
69 MaybeUInt64 bytesIn
= 0;
72 /*---------------------------------------------------*/
73 /*--- Header bytes ---*/
74 /*---------------------------------------------------*/
76 #define BZ_HDR_B 0x42 /* 'B' */
77 #define BZ_HDR_Z 0x5a /* 'Z' */
78 #define BZ_HDR_h 0x68 /* 'h' */
79 #define BZ_HDR_0 0x30 /* '0' */
82 /*---------------------------------------------------*/
83 /*--- I/O errors ---*/
84 /*---------------------------------------------------*/
86 /*---------------------------------------------*/
87 static void readError ( void )
90 "%s: I/O error reading `%s', possible reason follows.\n",
91 progName
, inFileName
);
93 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
99 /*---------------------------------------------*/
100 static void writeError ( void )
103 "%s: I/O error reading `%s', possible reason follows.\n",
104 progName
, inFileName
);
106 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
112 /*---------------------------------------------*/
113 static void mallocFail ( Int32 n
)
116 "%s: malloc failed on request for %d bytes.\n",
118 fprintf ( stderr
, "%s: warning: output file(s) may be incomplete.\n",
124 /*---------------------------------------------*/
125 static void tooManyBlocks ( Int32 max_handled_blocks
)
128 "%s: `%s' appears to contain more than %d blocks\n",
129 progName
, inFileName
, max_handled_blocks
);
131 "%s: and cannot be handled. To fix, increase\n",
134 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
141 /*---------------------------------------------------*/
142 /*--- Bit stream I/O ---*/
143 /*---------------------------------------------------*/
155 /*---------------------------------------------*/
156 static BitStream
* bsOpenReadStream ( FILE* stream
)
158 BitStream
*bs
= malloc ( sizeof(BitStream
) );
159 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
168 /*---------------------------------------------*/
169 static BitStream
* bsOpenWriteStream ( FILE* stream
)
171 BitStream
*bs
= malloc ( sizeof(BitStream
) );
172 if (bs
== NULL
) mallocFail ( sizeof(BitStream
) );
181 /*---------------------------------------------*/
182 static void bsPutBit ( BitStream
* bs
, Int32 bit
)
184 if (bs
->buffLive
== 8) {
185 Int32 retVal
= putc ( (UChar
) bs
->buffer
, bs
->handle
);
186 if (retVal
== EOF
) writeError();
189 bs
->buffer
= bit
& 0x1;
191 bs
->buffer
= ( (bs
->buffer
<< 1) | (bit
& 0x1) );
197 /*---------------------------------------------*/
199 Returns 0 or 1, or 2 to indicate EOF.
201 static Int32
bsGetBit ( BitStream
* bs
)
203 if (bs
->buffLive
> 0) {
205 return ( ((bs
->buffer
) >> (bs
->buffLive
)) & 0x1 );
207 Int32 retVal
= getc ( bs
->handle
);
208 if ( retVal
== EOF
) {
209 if (errno
!= 0) readError();
214 return ( ((bs
->buffer
) >> 7) & 0x1 );
219 /*---------------------------------------------*/
220 static void bsClose ( BitStream
* bs
)
224 if ( bs
->mode
== 'w' ) {
225 while ( bs
->buffLive
< 8 ) {
229 retVal
= putc ( (UChar
) (bs
->buffer
), bs
->handle
);
230 if (retVal
== EOF
) writeError();
232 retVal
= fflush ( bs
->handle
);
233 if (retVal
== EOF
) writeError();
235 retVal
= fclose ( bs
->handle
);
237 if (bs
->mode
== 'w') writeError(); else readError();
243 /*---------------------------------------------*/
244 static void bsPutUChar ( BitStream
* bs
, UChar c
)
247 for (i
= 7; i
>= 0; i
--)
248 bsPutBit ( bs
, (((UInt32
) c
) >> i
) & 0x1 );
252 /*---------------------------------------------*/
253 static void bsPutUInt32 ( BitStream
* bs
, UInt32 c
)
257 for (i
= 31; i
>= 0; i
--)
258 bsPutBit ( bs
, (c
>> i
) & 0x1 );
262 /*---------------------------------------------*/
263 static Bool
endsInBz2 ( Char
* name
)
265 Int32 n
= strlen ( name
);
266 if (n
<= 4) return False
;
274 /*---------------------------------------------*/
275 /* Open an output file safely with O_EXCL and good permissions */
276 FILE* fopen_output( Char
* name
, const char* mode
)
281 fh
= open(name
, O_WRONLY
|O_CREAT
|O_EXCL
, 0600);
282 if (fh
== -1) return NULL
;
283 fp
= fdopen(fh
, mode
);
284 if (fp
== NULL
) close(fh
);
288 /*---------------------------------------------------*/
290 /*---------------------------------------------------*/
292 /* This logic isn't really right when it comes to Cygwin. */
294 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
296 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
299 #define BLOCK_HEADER_HI 0x00003141UL
300 #define BLOCK_HEADER_LO 0x59265359UL
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
305 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
306 would have an uncompressed size of at least 40GB, so the chances
307 are low you'll need to up this.
309 #define BZ_MAX_HANDLED_BLOCKS 50000
311 MaybeUInt64 bStart
[BZ_MAX_HANDLED_BLOCKS
];
312 MaybeUInt64 bEnd
[BZ_MAX_HANDLED_BLOCKS
];
313 MaybeUInt64 rbStart
[BZ_MAX_HANDLED_BLOCKS
];
314 MaybeUInt64 rbEnd
[BZ_MAX_HANDLED_BLOCKS
];
316 Int32
main ( Int32 argc
, Char
** argv
)
320 BitStream
* bsIn
, *bsWr
;
321 Int32 b
, wrBlock
, currBlock
, rbCtr
;
322 MaybeUInt64 bitsRead
;
324 UInt32 buffHi
, buffLo
, blockCRC
;
327 strcpy ( progName
, argv
[0] );
328 inFileName
[0] = outFileName
[0] = 0;
331 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
334 fprintf ( stderr
, "%s: usage is `%s damaged_file_name'.\n",
335 progName
, progName
);
336 switch (sizeof(MaybeUInt64
)) {
339 "\trestrictions on size of recovered file: None\n");
343 "\trestrictions on size of recovered file: 512 MB\n");
345 "\tto circumvent, recompile with MaybeUInt64 as an\n"
346 "\tunsigned 64-bit int.\n");
350 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
351 "configuration error.\n");
357 if (strlen(argv
[1]) >= BZ_MAX_FILENAME
-20) {
359 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
360 progName
, (int)strlen(argv
[1]) );
364 strcpy ( inFileName
, argv
[1] );
366 inFile
= fopen ( inFileName
, "rb" );
367 if (inFile
== NULL
) {
368 fprintf ( stderr
, "%s: can't read `%s'\n", progName
, inFileName
);
372 bsIn
= bsOpenReadStream ( inFile
);
373 fprintf ( stderr
, "%s: searching for block boundaries ...\n", progName
);
378 bStart
[currBlock
] = 0;
383 b
= bsGetBit ( bsIn
);
386 if (bitsRead
>= bStart
[currBlock
] &&
387 (bitsRead
- bStart
[currBlock
]) >= 40) {
388 bEnd
[currBlock
] = bitsRead
-1;
390 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
391 " to " MaybeUInt64_FMT
" (incomplete)\n",
392 currBlock
, bStart
[currBlock
], bEnd
[currBlock
] );
397 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
398 buffLo
= (buffLo
<< 1) | (b
& 1);
399 if ( ( (buffHi
& 0x0000ffff) == BLOCK_HEADER_HI
400 && buffLo
== BLOCK_HEADER_LO
)
402 ( (buffHi
& 0x0000ffff) == BLOCK_ENDMARK_HI
403 && buffLo
== BLOCK_ENDMARK_LO
)
406 bEnd
[currBlock
] = bitsRead
-49;
411 (bEnd
[currBlock
] - bStart
[currBlock
]) >= 130) {
412 fprintf ( stderr
, " block %d runs from " MaybeUInt64_FMT
413 " to " MaybeUInt64_FMT
"\n",
414 rbCtr
+1, bStart
[currBlock
], bEnd
[currBlock
] );
415 rbStart
[rbCtr
] = bStart
[currBlock
];
416 rbEnd
[rbCtr
] = bEnd
[currBlock
];
419 if (currBlock
>= BZ_MAX_HANDLED_BLOCKS
)
420 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS
);
423 bStart
[currBlock
] = bitsRead
;
429 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
433 "%s: sorry, I couldn't find any block boundaries.\n",
438 fprintf ( stderr
, "%s: splitting into blocks\n", progName
);
440 inFile
= fopen ( inFileName
, "rb" );
441 if (inFile
== NULL
) {
442 fprintf ( stderr
, "%s: can't open `%s'\n", progName
, inFileName
);
445 bsIn
= bsOpenReadStream ( inFile
);
447 /*-- placate gcc's dataflow analyser --*/
448 blockCRC
= 0; bsWr
= 0;
456 buffHi
= (buffHi
<< 1) | (buffLo
>> 31);
457 buffLo
= (buffLo
<< 1) | (b
& 1);
458 if (bitsRead
== 47+rbStart
[wrBlock
])
459 blockCRC
= (buffHi
<< 16) | (buffLo
>> 16);
461 if (outFile
!= NULL
&& bitsRead
>= rbStart
[wrBlock
]
462 && bitsRead
<= rbEnd
[wrBlock
]) {
463 bsPutBit ( bsWr
, b
);
468 if (bitsRead
== rbEnd
[wrBlock
]+1) {
469 if (outFile
!= NULL
) {
470 bsPutUChar ( bsWr
, 0x17 ); bsPutUChar ( bsWr
, 0x72 );
471 bsPutUChar ( bsWr
, 0x45 ); bsPutUChar ( bsWr
, 0x38 );
472 bsPutUChar ( bsWr
, 0x50 ); bsPutUChar ( bsWr
, 0x90 );
473 bsPutUInt32 ( bsWr
, blockCRC
);
476 if (wrBlock
>= rbCtr
) break;
479 if (bitsRead
== rbStart
[wrBlock
]) {
480 /* Create the output file name, correctly handling leading paths.
481 (31.10.2001 by Sergey E. Kusikov) */
484 for (k
= 0; k
< BZ_MAX_FILENAME
; k
++)
486 strcpy (outFileName
, inFileName
);
487 split
= strrchr (outFileName
, BZ_SPLIT_SYM
);
493 /* Now split points to the start of the basename. */
494 ofs
= split
- outFileName
;
495 sprintf (split
, "rec%5d", wrBlock
+1);
496 for (p
= split
; *p
!= 0; p
++) if (*p
== ' ') *p
= '0';
497 strcat (outFileName
, inFileName
+ ofs
);
499 if ( !endsInBz2(outFileName
)) strcat ( outFileName
, ".bz2" );
501 fprintf ( stderr
, " writing block %d to `%s' ...\n",
502 wrBlock
+1, outFileName
);
504 outFile
= fopen_output ( outFileName
, "wb" );
505 if (outFile
== NULL
) {
506 fprintf ( stderr
, "%s: can't write `%s'\n",
507 progName
, outFileName
);
510 bsWr
= bsOpenWriteStream ( outFile
);
511 bsPutUChar ( bsWr
, BZ_HDR_B
);
512 bsPutUChar ( bsWr
, BZ_HDR_Z
);
513 bsPutUChar ( bsWr
, BZ_HDR_h
);
514 bsPutUChar ( bsWr
, BZ_HDR_0
+ 9 );
515 bsPutUChar ( bsWr
, 0x31 ); bsPutUChar ( bsWr
, 0x41 );
516 bsPutUChar ( bsWr
, 0x59 ); bsPutUChar ( bsWr
, 0x26 );
517 bsPutUChar ( bsWr
, 0x53 ); bsPutUChar ( bsWr
, 0x59 );
521 fprintf ( stderr
, "%s: finished\n", progName
);
527 /*-----------------------------------------------------------*/
528 /*--- end bzip2recover.c ---*/
529 /*-----------------------------------------------------------*/