2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
25 /*---------------------------------------------------*/
27 void makeMaps_d ( DState
* s
)
31 for (i
= 0; i
< 256; i
++)
33 s
->seqToUnseq
[s
->nInUse
] = i
;
39 /*---------------------------------------------------*/
41 { retVal = rrr; goto save_state_and_return; }
43 #define GET_BITS(lll,vvv,nnn) \
45 case lll: s->state = lll; \
47 if (s->bsLive >= nnn) { \
50 (s->bsLive-nnn)) & ((1 << nnn)-1); \
55 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
57 = (s->bsBuff << 8) | \
59 (*((UChar*)(s->strm->next_in)))); \
62 s->strm->avail_in--; \
63 s->strm->total_in_lo32++; \
64 if (s->strm->total_in_lo32 == 0) \
65 s->strm->total_in_hi32++; \
68 #define GET_UCHAR(lll,uuu) \
71 #define GET_BIT(lll,uuu) \
74 /*---------------------------------------------------*/
75 #define GET_MTF_VAL(label1,label2,lval) \
77 if (groupPos == 0) { \
79 if (groupNo >= nSelectors) \
80 RETURN(BZ_DATA_ERROR); \
81 groupPos = BZ_G_SIZE; \
82 gSel = s->selector[groupNo]; \
83 gMinlen = s->minLens[gSel]; \
84 gLimit = &(s->limit[gSel][0]); \
85 gPerm = &(s->perm[gSel][0]); \
86 gBase = &(s->base[gSel][0]); \
90 GET_BITS(label1, zvec, zn); \
92 if (zn > 20 /* the longest code */) \
93 RETURN(BZ_DATA_ERROR); \
94 if (zvec <= gLimit[zn]) break; \
96 GET_BIT(label2, zj); \
97 zvec = (zvec << 1) | zj; \
99 if (zvec - gBase[zn] < 0 \
100 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
101 RETURN(BZ_DATA_ERROR); \
102 lval = gPerm[zvec - gBase[zn]]; \
106 /*---------------------------------------------------*/
107 Int32
BZ2_decompress ( DState
* s
)
111 Int32 minLen
, maxLen
;
112 bz_stream
* strm
= s
->strm
;
114 /* stuff that needs to be saved/restored */
140 if (s
->state
== BZ_X_MAGIC_1
) {
141 /*initialise the save area*/
145 s
->save_alphaSize
= 0;
147 s
->save_nSelectors
= 0;
150 s
->save_groupPos
= 0;
152 s
->save_nblockMAX
= 0;
163 s
->save_gLimit
= NULL
;
164 s
->save_gBase
= NULL
;
165 s
->save_gPerm
= NULL
;
168 /*restore from the save area*/
172 alphaSize
= s
->save_alphaSize
;
173 nGroups
= s
->save_nGroups
;
174 nSelectors
= s
->save_nSelectors
;
176 groupNo
= s
->save_groupNo
;
177 groupPos
= s
->save_groupPos
;
178 nextSym
= s
->save_nextSym
;
179 nblockMAX
= s
->save_nblockMAX
;
180 nblock
= s
->save_nblock
;
189 gMinlen
= s
->save_gMinlen
;
190 gLimit
= s
->save_gLimit
;
191 gBase
= s
->save_gBase
;
192 gPerm
= s
->save_gPerm
;
198 GET_UCHAR(BZ_X_MAGIC_1
, uc
);
199 if (uc
!= BZ_HDR_B
) RETURN(BZ_DATA_ERROR_MAGIC
);
201 GET_UCHAR(BZ_X_MAGIC_2
, uc
);
202 if (uc
!= BZ_HDR_Z
) RETURN(BZ_DATA_ERROR_MAGIC
);
204 GET_UCHAR(BZ_X_MAGIC_3
, uc
)
205 if (uc
!= BZ_HDR_h
) RETURN(BZ_DATA_ERROR_MAGIC
);
207 GET_BITS(BZ_X_MAGIC_4
, s
->blockSize100k
, 8)
208 if (s
->blockSize100k
< (BZ_HDR_0
+ 1) ||
209 s
->blockSize100k
> (BZ_HDR_0
+ 9)) RETURN(BZ_DATA_ERROR_MAGIC
);
210 s
->blockSize100k
-= BZ_HDR_0
;
212 if (s
->smallDecompress
) {
213 s
->ll16
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(UInt16
) );
215 ((1 + s
->blockSize100k
* 100000) >> 1) * sizeof(UChar
)
217 if (s
->ll16
== NULL
|| s
->ll4
== NULL
) RETURN(BZ_MEM_ERROR
);
219 s
->tt
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(Int32
) );
220 if (s
->tt
== NULL
) RETURN(BZ_MEM_ERROR
);
223 GET_UCHAR(BZ_X_BLKHDR_1
, uc
);
225 if (uc
== 0x17) goto endhdr_2
;
226 if (uc
!= 0x31) RETURN(BZ_DATA_ERROR
);
227 GET_UCHAR(BZ_X_BLKHDR_2
, uc
);
228 if (uc
!= 0x41) RETURN(BZ_DATA_ERROR
);
229 GET_UCHAR(BZ_X_BLKHDR_3
, uc
);
230 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
231 GET_UCHAR(BZ_X_BLKHDR_4
, uc
);
232 if (uc
!= 0x26) RETURN(BZ_DATA_ERROR
);
233 GET_UCHAR(BZ_X_BLKHDR_5
, uc
);
234 if (uc
!= 0x53) RETURN(BZ_DATA_ERROR
);
235 GET_UCHAR(BZ_X_BLKHDR_6
, uc
);
236 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
239 if (s
->verbosity
>= 2)
240 VPrintf1 ( "\n [%d: huff+mtf ", s
->currBlockNo
);
242 s
->storedBlockCRC
= 0;
243 GET_UCHAR(BZ_X_BCRC_1
, uc
);
244 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
245 GET_UCHAR(BZ_X_BCRC_2
, uc
);
246 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
247 GET_UCHAR(BZ_X_BCRC_3
, uc
);
248 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
249 GET_UCHAR(BZ_X_BCRC_4
, uc
);
250 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
252 GET_BITS(BZ_X_RANDBIT
, s
->blockRandomised
, 1);
255 GET_UCHAR(BZ_X_ORIGPTR_1
, uc
);
256 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
257 GET_UCHAR(BZ_X_ORIGPTR_2
, uc
);
258 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
259 GET_UCHAR(BZ_X_ORIGPTR_3
, uc
);
260 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
263 RETURN(BZ_DATA_ERROR
);
264 if (s
->origPtr
> 10 + 100000*s
->blockSize100k
)
265 RETURN(BZ_DATA_ERROR
);
267 /*--- Receive the mapping table ---*/
268 for (i
= 0; i
< 16; i
++) {
269 GET_BIT(BZ_X_MAPPING_1
, uc
);
271 s
->inUse16
[i
] = True
; else
272 s
->inUse16
[i
] = False
;
275 for (i
= 0; i
< 256; i
++) s
->inUse
[i
] = False
;
277 for (i
= 0; i
< 16; i
++)
279 for (j
= 0; j
< 16; j
++) {
280 GET_BIT(BZ_X_MAPPING_2
, uc
);
281 if (uc
== 1) s
->inUse
[i
* 16 + j
] = True
;
284 if (s
->nInUse
== 0) RETURN(BZ_DATA_ERROR
);
285 alphaSize
= s
->nInUse
+2;
287 /*--- Now the selectors ---*/
288 GET_BITS(BZ_X_SELECTOR_1
, nGroups
, 3);
289 if (nGroups
< 2 || nGroups
> 6) RETURN(BZ_DATA_ERROR
);
290 GET_BITS(BZ_X_SELECTOR_2
, nSelectors
, 15);
291 if (nSelectors
< 1) RETURN(BZ_DATA_ERROR
);
292 for (i
= 0; i
< nSelectors
; i
++) {
295 GET_BIT(BZ_X_SELECTOR_3
, uc
);
298 if (j
>= nGroups
) RETURN(BZ_DATA_ERROR
);
300 s
->selectorMtf
[i
] = j
;
303 /*--- Undo the MTF values for the selectors. ---*/
305 UChar pos
[BZ_N_GROUPS
], tmp
, v
;
306 for (v
= 0; v
< nGroups
; v
++) pos
[v
] = v
;
308 for (i
= 0; i
< nSelectors
; i
++) {
309 v
= s
->selectorMtf
[i
];
311 while (v
> 0) { pos
[v
] = pos
[v
-1]; v
--; }
313 s
->selector
[i
] = tmp
;
317 /*--- Now the coding tables ---*/
318 for (t
= 0; t
< nGroups
; t
++) {
319 GET_BITS(BZ_X_CODING_1
, curr
, 5);
320 for (i
= 0; i
< alphaSize
; i
++) {
322 if (curr
< 1 || curr
> 20) RETURN(BZ_DATA_ERROR
);
323 GET_BIT(BZ_X_CODING_2
, uc
);
325 GET_BIT(BZ_X_CODING_3
, uc
);
326 if (uc
== 0) curr
++; else curr
--;
332 /*--- Create the Huffman decoding tables ---*/
333 for (t
= 0; t
< nGroups
; t
++) {
336 for (i
= 0; i
< alphaSize
; i
++) {
337 if (s
->len
[t
][i
] > maxLen
) maxLen
= s
->len
[t
][i
];
338 if (s
->len
[t
][i
] < minLen
) minLen
= s
->len
[t
][i
];
340 BZ2_hbCreateDecodeTables (
345 minLen
, maxLen
, alphaSize
347 s
->minLens
[t
] = minLen
;
350 /*--- Now the MTF values ---*/
353 nblockMAX
= 100000 * s
->blockSize100k
;
357 for (i
= 0; i
<= 255; i
++) s
->unzftab
[i
] = 0;
363 for (ii
= 256 / MTFL_SIZE
- 1; ii
>= 0; ii
--) {
364 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
365 s
->mtfa
[kk
] = (UChar
)(ii
* MTFL_SIZE
+ jj
);
368 s
->mtfbase
[ii
] = kk
+ 1;
371 /*-- end MTF init --*/
374 GET_MTF_VAL(BZ_X_MTF_1
, BZ_X_MTF_2
, nextSym
);
378 if (nextSym
== EOB
) break;
380 if (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
) {
385 /* Check that N doesn't get too big, so that es doesn't
386 go negative. The maximum value that can be
387 RUNA/RUNB encoded is equal to the block size (post
388 the initial RLE), viz, 900k, so bounding N at 2
389 million should guard against overflow without
390 rejecting any legitimate inputs. */
391 if (N
>= 2*1024*1024) RETURN(BZ_DATA_ERROR
);
392 if (nextSym
== BZ_RUNA
) es
= es
+ (0+1) * N
; else
393 if (nextSym
== BZ_RUNB
) es
= es
+ (1+1) * N
;
395 GET_MTF_VAL(BZ_X_MTF_3
, BZ_X_MTF_4
, nextSym
);
397 while (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
);
400 uc
= s
->seqToUnseq
[ s
->mtfa
[s
->mtfbase
[0]] ];
401 s
->unzftab
[uc
] += es
;
403 if (s
->smallDecompress
)
405 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
406 s
->ll16
[nblock
] = (UInt16
)uc
;
412 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
413 s
->tt
[nblock
] = (UInt32
)uc
;
422 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
424 /*-- uc = MTF ( nextSym-1 ) --*/
426 Int32 ii
, jj
, kk
, pp
, lno
, off
;
428 nn
= (UInt32
)(nextSym
- 1);
430 if (nn
< MTFL_SIZE
) {
431 /* avoid general-case expense */
436 s
->mtfa
[(z
) ] = s
->mtfa
[(z
)-1];
437 s
->mtfa
[(z
)-1] = s
->mtfa
[(z
)-2];
438 s
->mtfa
[(z
)-2] = s
->mtfa
[(z
)-3];
439 s
->mtfa
[(z
)-3] = s
->mtfa
[(z
)-4];
443 s
->mtfa
[(pp
+nn
)] = s
->mtfa
[(pp
+nn
)-1]; nn
--;
448 lno
= nn
/ MTFL_SIZE
;
449 off
= nn
% MTFL_SIZE
;
450 pp
= s
->mtfbase
[lno
] + off
;
452 while (pp
> s
->mtfbase
[lno
]) {
453 s
->mtfa
[pp
] = s
->mtfa
[pp
-1]; pp
--;
458 s
->mtfa
[s
->mtfbase
[lno
]]
459 = s
->mtfa
[s
->mtfbase
[lno
-1] + MTFL_SIZE
- 1];
463 s
->mtfa
[s
->mtfbase
[0]] = uc
;
464 if (s
->mtfbase
[0] == 0) {
466 for (ii
= 256 / MTFL_SIZE
-1; ii
>= 0; ii
--) {
467 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
468 s
->mtfa
[kk
] = s
->mtfa
[s
->mtfbase
[ii
] + jj
];
471 s
->mtfbase
[ii
] = kk
+ 1;
476 /*-- end uc = MTF ( nextSym-1 ) --*/
478 s
->unzftab
[s
->seqToUnseq
[uc
]]++;
479 if (s
->smallDecompress
)
480 s
->ll16
[nblock
] = (UInt16
)(s
->seqToUnseq
[uc
]); else
481 s
->tt
[nblock
] = (UInt32
)(s
->seqToUnseq
[uc
]);
484 GET_MTF_VAL(BZ_X_MTF_5
, BZ_X_MTF_6
, nextSym
);
489 /* Now we know what nblock is, we can do a better sanity
492 if (s
->origPtr
< 0 || s
->origPtr
>= nblock
)
493 RETURN(BZ_DATA_ERROR
);
495 /*-- Set up cftab to facilitate generation of T^(-1) --*/
496 /* Check: unzftab entries in range. */
497 for (i
= 0; i
<= 255; i
++) {
498 if (s
->unzftab
[i
] < 0 || s
->unzftab
[i
] > nblock
)
499 RETURN(BZ_DATA_ERROR
);
501 /* Actually generate cftab. */
503 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] = s
->unzftab
[i
-1];
504 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] += s
->cftab
[i
-1];
505 /* Check: cftab entries in range. */
506 for (i
= 0; i
<= 256; i
++) {
507 if (s
->cftab
[i
] < 0 || s
->cftab
[i
] > nblock
) {
508 /* s->cftab[i] can legitimately be == nblock */
509 RETURN(BZ_DATA_ERROR
)
512 /* Check: cftab entries non-descending. */
513 for (i
= 1; i
<= 256; i
++) {
514 if (s
->cftab
[i
-1] > s
->cftab
[i
]) {
515 RETURN(BZ_DATA_ERROR
)
519 s
->state_out_len
= 0;
521 BZ_INITIALISE_CRC ( s
->calculatedBlockCRC
);
522 s
->state
= BZ_X_OUTPUT
;
523 if (s
->verbosity
>= 2) VPrintf0 ( "rt+rld" );
525 if (s
->smallDecompress
) {
527 /*-- Make a copy of cftab, used in generation of T --*/
528 for (i
= 0; i
<= 256; i
++) s
->cftabCopy
[i
] = s
->cftab
[i
];
530 /*-- compute the T vector --*/
531 for (i
= 0; i
< nblock
; i
++) {
532 uc
= (UChar
)(s
->ll16
[i
]);
533 SET_LL(i
, s
->cftabCopy
[uc
]);
537 /*-- Compute T^(-1) by pointer reversal on T --*/
541 Int32 tmp
= GET_LL(j
);
546 while (i
!= s
->origPtr
);
548 s
->tPos
= s
->origPtr
;
550 if (s
->blockRandomised
) {
554 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
556 BZ_GET_SMALL(s
->k0
); s
->nblock_used
++;
561 /*-- compute the T^(-1) vector --*/
562 for (i
= 0; i
< nblock
; i
++) {
563 uc
= (UChar
)(s
->tt
[i
] & 0xff);
564 s
->tt
[s
->cftab
[uc
]] |= (i
<< 8);
568 s
->tPos
= s
->tt
[s
->origPtr
] >> 8;
570 if (s
->blockRandomised
) {
574 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
576 BZ_GET_FAST(s
->k0
); s
->nblock_used
++;
587 GET_UCHAR(BZ_X_ENDHDR_2
, uc
);
588 if (uc
!= 0x72) RETURN(BZ_DATA_ERROR
);
589 GET_UCHAR(BZ_X_ENDHDR_3
, uc
);
590 if (uc
!= 0x45) RETURN(BZ_DATA_ERROR
);
591 GET_UCHAR(BZ_X_ENDHDR_4
, uc
);
592 if (uc
!= 0x38) RETURN(BZ_DATA_ERROR
);
593 GET_UCHAR(BZ_X_ENDHDR_5
, uc
);
594 if (uc
!= 0x50) RETURN(BZ_DATA_ERROR
);
595 GET_UCHAR(BZ_X_ENDHDR_6
, uc
);
596 if (uc
!= 0x90) RETURN(BZ_DATA_ERROR
);
598 s
->storedCombinedCRC
= 0;
599 GET_UCHAR(BZ_X_CCRC_1
, uc
);
600 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
601 GET_UCHAR(BZ_X_CCRC_2
, uc
);
602 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
603 GET_UCHAR(BZ_X_CCRC_3
, uc
);
604 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
605 GET_UCHAR(BZ_X_CCRC_4
, uc
);
606 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
608 s
->state
= BZ_X_IDLE
;
609 RETURN(BZ_STREAM_END
)
611 default: AssertH ( False
, 4001 );
614 AssertH ( False
, 4002 );
616 save_state_and_return
:
621 s
->save_alphaSize
= alphaSize
;
622 s
->save_nGroups
= nGroups
;
623 s
->save_nSelectors
= nSelectors
;
625 s
->save_groupNo
= groupNo
;
626 s
->save_groupPos
= groupPos
;
627 s
->save_nextSym
= nextSym
;
628 s
->save_nblockMAX
= nblockMAX
;
629 s
->save_nblock
= nblock
;
638 s
->save_gMinlen
= gMinlen
;
639 s
->save_gLimit
= gLimit
;
640 s
->save_gBase
= gBase
;
641 s
->save_gPerm
= gPerm
;
647 /*-------------------------------------------------------------*/
648 /*--- end decompress.c ---*/
649 /*-------------------------------------------------------------*/