(Debian) added bzexe
[mirror-ossqm-bzip2.git] / bzip2recover.c
blob485c6a01491cd6437b08e2f776fe073c99ba6bdd
1 /*-----------------------------------------------------------*/
2 /*--- Block recoverer program for bzip2 ---*/
3 /*--- bzip2recover.c ---*/
4 /*-----------------------------------------------------------*/
6 /* ------------------------------------------------------------------
7 This file is part of bzip2/libbzip2, a program and library for
8 lossless, block-sorting data compression.
10 bzip2/libbzip2 version 1.0.5 of 10 December 2007
11 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13 Please read the WARNING, DISCLAIMER and PATENTS sections in the
14 README file.
16 This program is released under the terms of the license contained
17 in the file LICENSE.
18 ------------------------------------------------------------------ */
20 /* This program is a complete hack and should be rewritten properly.
21 It isn't very complicated. */
23 #include <stdio.h>
24 #include <errno.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <fcntl.h>
28 #include <unistd.h>
31 /* This program records bit locations in the file to be recovered.
32 That means that if 64-bit ints are not supported, we will not
33 be able to recover .bz2 files over 512MB (2^32 bits) long.
34 On GNU supported platforms, we take advantage of the 64-bit
35 int support to circumvent this problem. Ditto MSVC.
37 This change occurred in version 1.0.2; all prior versions have
38 the 512MB limitation.
40 #ifdef __GNUC__
41 typedef unsigned long long int MaybeUInt64;
42 # define MaybeUInt64_FMT "%Lu"
43 #else
44 #ifdef _MSC_VER
45 typedef unsigned __int64 MaybeUInt64;
46 # define MaybeUInt64_FMT "%I64u"
47 #else
48 typedef unsigned int MaybeUInt64;
49 # define MaybeUInt64_FMT "%u"
50 #endif
51 #endif
53 typedef unsigned int UInt32;
54 typedef int Int32;
55 typedef unsigned char UChar;
56 typedef char Char;
57 typedef unsigned char Bool;
58 #define True ((Bool)1)
59 #define False ((Bool)0)
62 #define BZ_MAX_FILENAME 2000
64 Char inFileName[BZ_MAX_FILENAME];
65 Char outFileName[BZ_MAX_FILENAME];
66 Char progName[BZ_MAX_FILENAME];
68 MaybeUInt64 bytesOut = 0;
69 MaybeUInt64 bytesIn = 0;
72 /*---------------------------------------------------*/
73 /*--- Header bytes ---*/
74 /*---------------------------------------------------*/
76 #define BZ_HDR_B 0x42 /* 'B' */
77 #define BZ_HDR_Z 0x5a /* 'Z' */
78 #define BZ_HDR_h 0x68 /* 'h' */
79 #define BZ_HDR_0 0x30 /* '0' */
82 /*---------------------------------------------------*/
83 /*--- I/O errors ---*/
84 /*---------------------------------------------------*/
86 /*---------------------------------------------*/
87 static void readError ( void )
89 fprintf ( stderr,
90 "%s: I/O error reading `%s', possible reason follows.\n",
91 progName, inFileName );
92 perror ( progName );
93 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
94 progName );
95 exit ( 1 );
99 /*---------------------------------------------*/
100 static void writeError ( void )
102 fprintf ( stderr,
103 "%s: I/O error reading `%s', possible reason follows.\n",
104 progName, inFileName );
105 perror ( progName );
106 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
107 progName );
108 exit ( 1 );
112 /*---------------------------------------------*/
113 static void mallocFail ( Int32 n )
115 fprintf ( stderr,
116 "%s: malloc failed on request for %d bytes.\n",
117 progName, n );
118 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
119 progName );
120 exit ( 1 );
124 /*---------------------------------------------*/
125 static void tooManyBlocks ( Int32 max_handled_blocks )
127 fprintf ( stderr,
128 "%s: `%s' appears to contain more than %d blocks\n",
129 progName, inFileName, max_handled_blocks );
130 fprintf ( stderr,
131 "%s: and cannot be handled. To fix, increase\n",
132 progName );
133 fprintf ( stderr,
134 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n",
135 progName );
136 exit ( 1 );
141 /*---------------------------------------------------*/
142 /*--- Bit stream I/O ---*/
143 /*---------------------------------------------------*/
145 typedef
146 struct {
147 FILE* handle;
148 Int32 buffer;
149 Int32 buffLive;
150 Char mode;
152 BitStream;
155 /*---------------------------------------------*/
156 static BitStream* bsOpenReadStream ( FILE* stream )
158 BitStream *bs = malloc ( sizeof(BitStream) );
159 if (bs == NULL) mallocFail ( sizeof(BitStream) );
160 bs->handle = stream;
161 bs->buffer = 0;
162 bs->buffLive = 0;
163 bs->mode = 'r';
164 return bs;
168 /*---------------------------------------------*/
169 static BitStream* bsOpenWriteStream ( FILE* stream )
171 BitStream *bs = malloc ( sizeof(BitStream) );
172 if (bs == NULL) mallocFail ( sizeof(BitStream) );
173 bs->handle = stream;
174 bs->buffer = 0;
175 bs->buffLive = 0;
176 bs->mode = 'w';
177 return bs;
181 /*---------------------------------------------*/
182 static void bsPutBit ( BitStream* bs, Int32 bit )
184 if (bs->buffLive == 8) {
185 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
186 if (retVal == EOF) writeError();
187 bytesOut++;
188 bs->buffLive = 1;
189 bs->buffer = bit & 0x1;
190 } else {
191 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
192 bs->buffLive++;
197 /*---------------------------------------------*/
198 /*--
199 Returns 0 or 1, or 2 to indicate EOF.
200 --*/
201 static Int32 bsGetBit ( BitStream* bs )
203 if (bs->buffLive > 0) {
204 bs->buffLive --;
205 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
206 } else {
207 Int32 retVal = getc ( bs->handle );
208 if ( retVal == EOF ) {
209 if (errno != 0) readError();
210 return 2;
212 bs->buffLive = 7;
213 bs->buffer = retVal;
214 return ( ((bs->buffer) >> 7) & 0x1 );
219 /*---------------------------------------------*/
220 static void bsClose ( BitStream* bs )
222 Int32 retVal;
224 if ( bs->mode == 'w' ) {
225 while ( bs->buffLive < 8 ) {
226 bs->buffLive++;
227 bs->buffer <<= 1;
229 retVal = putc ( (UChar) (bs->buffer), bs->handle );
230 if (retVal == EOF) writeError();
231 bytesOut++;
232 retVal = fflush ( bs->handle );
233 if (retVal == EOF) writeError();
235 retVal = fclose ( bs->handle );
236 if (retVal == EOF) {
237 if (bs->mode == 'w') writeError(); else readError();
239 free ( bs );
243 /*---------------------------------------------*/
244 static void bsPutUChar ( BitStream* bs, UChar c )
246 Int32 i;
247 for (i = 7; i >= 0; i--)
248 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
252 /*---------------------------------------------*/
253 static void bsPutUInt32 ( BitStream* bs, UInt32 c )
255 Int32 i;
257 for (i = 31; i >= 0; i--)
258 bsPutBit ( bs, (c >> i) & 0x1 );
262 /*---------------------------------------------*/
263 static Bool endsInBz2 ( Char* name )
265 Int32 n = strlen ( name );
266 if (n <= 4) return False;
267 return
268 (name[n-4] == '.' &&
269 name[n-3] == 'b' &&
270 name[n-2] == 'z' &&
271 name[n-1] == '2');
274 /*---------------------------------------------*/
275 /* Open an output file safely with O_EXCL and good permissions */
276 FILE* fopen_output( Char* name, const char* mode )
278 FILE *fp;
279 int fh;
281 fh = open(name, O_WRONLY|O_CREAT|O_EXCL, 0600);
282 if (fh == -1) return NULL;
283 fp = fdopen(fh, mode);
284 if (fp == NULL) close(fh);
285 return fp;
288 /*---------------------------------------------------*/
289 /*--- ---*/
290 /*---------------------------------------------------*/
292 /* This logic isn't really right when it comes to Cygwin. */
293 #ifdef _WIN32
294 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */
295 #else
296 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */
297 #endif
299 #define BLOCK_HEADER_HI 0x00003141UL
300 #define BLOCK_HEADER_LO 0x59265359UL
302 #define BLOCK_ENDMARK_HI 0x00001772UL
303 #define BLOCK_ENDMARK_LO 0x45385090UL
305 /* Increase if necessary. However, a .bz2 file with > 50000 blocks
306 would have an uncompressed size of at least 40GB, so the chances
307 are low you'll need to up this.
309 #define BZ_MAX_HANDLED_BLOCKS 50000
311 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS];
312 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS];
313 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS];
314 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS];
316 Int32 main ( Int32 argc, Char** argv )
318 FILE* inFile;
319 FILE* outFile;
320 BitStream* bsIn, *bsWr;
321 Int32 b, wrBlock, currBlock, rbCtr;
322 MaybeUInt64 bitsRead;
324 UInt32 buffHi, buffLo, blockCRC;
325 Char* p;
327 strcpy ( progName, argv[0] );
328 inFileName[0] = outFileName[0] = 0;
330 fprintf ( stderr,
331 "bzip2recover 1.0.5: extracts blocks from damaged .bz2 files.\n" );
333 if (argc != 2) {
334 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
335 progName, progName );
336 switch (sizeof(MaybeUInt64)) {
337 case 8:
338 fprintf(stderr,
339 "\trestrictions on size of recovered file: None\n");
340 break;
341 case 4:
342 fprintf(stderr,
343 "\trestrictions on size of recovered file: 512 MB\n");
344 fprintf(stderr,
345 "\tto circumvent, recompile with MaybeUInt64 as an\n"
346 "\tunsigned 64-bit int.\n");
347 break;
348 default:
349 fprintf(stderr,
350 "\tsizeof(MaybeUInt64) is not 4 or 8 -- "
351 "configuration error.\n");
352 break;
354 exit(1);
357 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) {
358 fprintf ( stderr,
359 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n",
360 progName, (int)strlen(argv[1]) );
361 exit(1);
364 strcpy ( inFileName, argv[1] );
366 inFile = fopen ( inFileName, "rb" );
367 if (inFile == NULL) {
368 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
369 exit(1);
372 bsIn = bsOpenReadStream ( inFile );
373 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
375 bitsRead = 0;
376 buffHi = buffLo = 0;
377 currBlock = 0;
378 bStart[currBlock] = 0;
380 rbCtr = 0;
382 while (True) {
383 b = bsGetBit ( bsIn );
384 bitsRead++;
385 if (b == 2) {
386 if (bitsRead >= bStart[currBlock] &&
387 (bitsRead - bStart[currBlock]) >= 40) {
388 bEnd[currBlock] = bitsRead-1;
389 if (currBlock > 0)
390 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
391 " to " MaybeUInt64_FMT " (incomplete)\n",
392 currBlock, bStart[currBlock], bEnd[currBlock] );
393 } else
394 currBlock--;
395 break;
397 buffHi = (buffHi << 1) | (buffLo >> 31);
398 buffLo = (buffLo << 1) | (b & 1);
399 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
400 && buffLo == BLOCK_HEADER_LO)
402 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
403 && buffLo == BLOCK_ENDMARK_LO)
405 if (bitsRead > 49) {
406 bEnd[currBlock] = bitsRead-49;
407 } else {
408 bEnd[currBlock] = 0;
410 if (currBlock > 0 &&
411 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
412 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT
413 " to " MaybeUInt64_FMT "\n",
414 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
415 rbStart[rbCtr] = bStart[currBlock];
416 rbEnd[rbCtr] = bEnd[currBlock];
417 rbCtr++;
419 if (currBlock >= BZ_MAX_HANDLED_BLOCKS)
420 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS);
421 currBlock++;
423 bStart[currBlock] = bitsRead;
427 bsClose ( bsIn );
429 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
431 if (rbCtr < 1) {
432 fprintf ( stderr,
433 "%s: sorry, I couldn't find any block boundaries.\n",
434 progName );
435 exit(1);
438 fprintf ( stderr, "%s: splitting into blocks\n", progName );
440 inFile = fopen ( inFileName, "rb" );
441 if (inFile == NULL) {
442 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
443 exit(1);
445 bsIn = bsOpenReadStream ( inFile );
447 /*-- placate gcc's dataflow analyser --*/
448 blockCRC = 0; bsWr = 0;
450 bitsRead = 0;
451 outFile = NULL;
452 wrBlock = 0;
453 while (True) {
454 b = bsGetBit(bsIn);
455 if (b == 2) break;
456 buffHi = (buffHi << 1) | (buffLo >> 31);
457 buffLo = (buffLo << 1) | (b & 1);
458 if (bitsRead == 47+rbStart[wrBlock])
459 blockCRC = (buffHi << 16) | (buffLo >> 16);
461 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
462 && bitsRead <= rbEnd[wrBlock]) {
463 bsPutBit ( bsWr, b );
466 bitsRead++;
468 if (bitsRead == rbEnd[wrBlock]+1) {
469 if (outFile != NULL) {
470 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
471 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
472 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
473 bsPutUInt32 ( bsWr, blockCRC );
474 bsClose ( bsWr );
476 if (wrBlock >= rbCtr) break;
477 wrBlock++;
478 } else
479 if (bitsRead == rbStart[wrBlock]) {
480 /* Create the output file name, correctly handling leading paths.
481 (31.10.2001 by Sergey E. Kusikov) */
482 Char* split;
483 Int32 ofs, k;
484 for (k = 0; k < BZ_MAX_FILENAME; k++)
485 outFileName[k] = 0;
486 strcpy (outFileName, inFileName);
487 split = strrchr (outFileName, BZ_SPLIT_SYM);
488 if (split == NULL) {
489 split = outFileName;
490 } else {
491 ++split;
493 /* Now split points to the start of the basename. */
494 ofs = split - outFileName;
495 sprintf (split, "rec%5d", wrBlock+1);
496 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0';
497 strcat (outFileName, inFileName + ofs);
499 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
501 fprintf ( stderr, " writing block %d to `%s' ...\n",
502 wrBlock+1, outFileName );
504 outFile = fopen_output ( outFileName, "wb" );
505 if (outFile == NULL) {
506 fprintf ( stderr, "%s: can't write `%s'\n",
507 progName, outFileName );
508 exit(1);
510 bsWr = bsOpenWriteStream ( outFile );
511 bsPutUChar ( bsWr, BZ_HDR_B );
512 bsPutUChar ( bsWr, BZ_HDR_Z );
513 bsPutUChar ( bsWr, BZ_HDR_h );
514 bsPutUChar ( bsWr, BZ_HDR_0 + 9 );
515 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
516 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
517 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
521 fprintf ( stderr, "%s: finished\n", progName );
522 return 0;
527 /*-----------------------------------------------------------*/
528 /*--- end bzip2recover.c ---*/
529 /*-----------------------------------------------------------*/