2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.8 of 13 July 2019
12 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
25 /*---------------------------------------------------*/
27 void makeMaps_d ( DState
* s
)
31 for (i
= 0; i
< 256; i
++)
33 s
->seqToUnseq
[s
->nInUse
] = i
;
39 /*---------------------------------------------------*/
41 { retVal = rrr; goto save_state_and_return; };
43 #define GET_BITS(lll,vvv,nnn) \
44 case lll: s->state = lll; \
46 if (s->bsLive >= nnn) { \
49 (s->bsLive-nnn)) & ((1 << nnn)-1); \
54 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
56 = (s->bsBuff << 8) | \
58 (*((UChar*)(s->strm->next_in)))); \
61 s->strm->avail_in--; \
62 s->strm->total_in_lo32++; \
63 if (s->strm->total_in_lo32 == 0) \
64 s->strm->total_in_hi32++; \
67 #define GET_UCHAR(lll,uuu) \
70 #define GET_BIT(lll,uuu) \
73 /*---------------------------------------------------*/
74 #define GET_MTF_VAL(label1,label2,lval) \
76 if (groupPos == 0) { \
78 if (groupNo >= nSelectors) \
79 RETURN(BZ_DATA_ERROR); \
80 groupPos = BZ_G_SIZE; \
81 gSel = s->selector[groupNo]; \
82 gMinlen = s->minLens[gSel]; \
83 gLimit = &(s->limit[gSel][0]); \
84 gPerm = &(s->perm[gSel][0]); \
85 gBase = &(s->base[gSel][0]); \
89 GET_BITS(label1, zvec, zn); \
91 if (zn > 20 /* the longest code */) \
92 RETURN(BZ_DATA_ERROR); \
93 if (zvec <= gLimit[zn]) break; \
95 GET_BIT(label2, zj); \
96 zvec = (zvec << 1) | zj; \
98 if (zvec - gBase[zn] < 0 \
99 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
100 RETURN(BZ_DATA_ERROR); \
101 lval = gPerm[zvec - gBase[zn]]; \
105 /*---------------------------------------------------*/
106 Int32
BZ2_decompress ( DState
* s
)
110 Int32 minLen
, maxLen
;
111 bz_stream
* strm
= s
->strm
;
113 /* stuff that needs to be saved/restored */
139 if (s
->state
== BZ_X_MAGIC_1
) {
140 /*initialise the save area*/
144 s
->save_alphaSize
= 0;
146 s
->save_nSelectors
= 0;
149 s
->save_groupPos
= 0;
151 s
->save_nblockMAX
= 0;
162 s
->save_gLimit
= NULL
;
163 s
->save_gBase
= NULL
;
164 s
->save_gPerm
= NULL
;
167 /*restore from the save area*/
171 alphaSize
= s
->save_alphaSize
;
172 nGroups
= s
->save_nGroups
;
173 nSelectors
= s
->save_nSelectors
;
175 groupNo
= s
->save_groupNo
;
176 groupPos
= s
->save_groupPos
;
177 nextSym
= s
->save_nextSym
;
178 nblockMAX
= s
->save_nblockMAX
;
179 nblock
= s
->save_nblock
;
188 gMinlen
= s
->save_gMinlen
;
189 gLimit
= s
->save_gLimit
;
190 gBase
= s
->save_gBase
;
191 gPerm
= s
->save_gPerm
;
197 GET_UCHAR(BZ_X_MAGIC_1
, uc
);
198 if (uc
!= BZ_HDR_B
) RETURN(BZ_DATA_ERROR_MAGIC
);
200 GET_UCHAR(BZ_X_MAGIC_2
, uc
);
201 if (uc
!= BZ_HDR_Z
) RETURN(BZ_DATA_ERROR_MAGIC
);
203 GET_UCHAR(BZ_X_MAGIC_3
, uc
)
204 if (uc
!= BZ_HDR_h
) RETURN(BZ_DATA_ERROR_MAGIC
);
206 GET_BITS(BZ_X_MAGIC_4
, s
->blockSize100k
, 8)
207 if (s
->blockSize100k
< (BZ_HDR_0
+ 1) ||
208 s
->blockSize100k
> (BZ_HDR_0
+ 9)) RETURN(BZ_DATA_ERROR_MAGIC
);
209 s
->blockSize100k
-= BZ_HDR_0
;
211 if (s
->smallDecompress
) {
212 s
->ll16
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(UInt16
) );
214 ((1 + s
->blockSize100k
* 100000) >> 1) * sizeof(UChar
)
216 if (s
->ll16
== NULL
|| s
->ll4
== NULL
) RETURN(BZ_MEM_ERROR
);
218 s
->tt
= BZALLOC( s
->blockSize100k
* 100000 * sizeof(Int32
) );
219 if (s
->tt
== NULL
) RETURN(BZ_MEM_ERROR
);
222 GET_UCHAR(BZ_X_BLKHDR_1
, uc
);
224 if (uc
== 0x17) goto endhdr_2
;
225 if (uc
!= 0x31) RETURN(BZ_DATA_ERROR
);
226 GET_UCHAR(BZ_X_BLKHDR_2
, uc
);
227 if (uc
!= 0x41) RETURN(BZ_DATA_ERROR
);
228 GET_UCHAR(BZ_X_BLKHDR_3
, uc
);
229 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
230 GET_UCHAR(BZ_X_BLKHDR_4
, uc
);
231 if (uc
!= 0x26) RETURN(BZ_DATA_ERROR
);
232 GET_UCHAR(BZ_X_BLKHDR_5
, uc
);
233 if (uc
!= 0x53) RETURN(BZ_DATA_ERROR
);
234 GET_UCHAR(BZ_X_BLKHDR_6
, uc
);
235 if (uc
!= 0x59) RETURN(BZ_DATA_ERROR
);
238 if (s
->verbosity
>= 2)
239 VPrintf1 ( "\n [%d: huff+mtf ", s
->currBlockNo
);
241 s
->storedBlockCRC
= 0;
242 GET_UCHAR(BZ_X_BCRC_1
, uc
);
243 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
244 GET_UCHAR(BZ_X_BCRC_2
, uc
);
245 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
246 GET_UCHAR(BZ_X_BCRC_3
, uc
);
247 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
248 GET_UCHAR(BZ_X_BCRC_4
, uc
);
249 s
->storedBlockCRC
= (s
->storedBlockCRC
<< 8) | ((UInt32
)uc
);
251 GET_BITS(BZ_X_RANDBIT
, s
->blockRandomised
, 1);
254 GET_UCHAR(BZ_X_ORIGPTR_1
, uc
);
255 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
256 GET_UCHAR(BZ_X_ORIGPTR_2
, uc
);
257 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
258 GET_UCHAR(BZ_X_ORIGPTR_3
, uc
);
259 s
->origPtr
= (s
->origPtr
<< 8) | ((Int32
)uc
);
262 RETURN(BZ_DATA_ERROR
);
263 if (s
->origPtr
> 10 + 100000*s
->blockSize100k
)
264 RETURN(BZ_DATA_ERROR
);
266 /*--- Receive the mapping table ---*/
267 for (i
= 0; i
< 16; i
++) {
268 GET_BIT(BZ_X_MAPPING_1
, uc
);
270 s
->inUse16
[i
] = True
; else
271 s
->inUse16
[i
] = False
;
274 for (i
= 0; i
< 256; i
++) s
->inUse
[i
] = False
;
276 for (i
= 0; i
< 16; i
++)
278 for (j
= 0; j
< 16; j
++) {
279 GET_BIT(BZ_X_MAPPING_2
, uc
);
280 if (uc
== 1) s
->inUse
[i
* 16 + j
] = True
;
283 if (s
->nInUse
== 0) RETURN(BZ_DATA_ERROR
);
284 alphaSize
= s
->nInUse
+2;
286 /*--- Now the selectors ---*/
287 GET_BITS(BZ_X_SELECTOR_1
, nGroups
, 3);
288 if (nGroups
< 2 || nGroups
> BZ_N_GROUPS
) RETURN(BZ_DATA_ERROR
);
289 GET_BITS(BZ_X_SELECTOR_2
, nSelectors
, 15);
290 if (nSelectors
< 1) RETURN(BZ_DATA_ERROR
);
291 for (i
= 0; i
< nSelectors
; i
++) {
294 GET_BIT(BZ_X_SELECTOR_3
, uc
);
297 if (j
>= nGroups
) RETURN(BZ_DATA_ERROR
);
299 /* Having more than BZ_MAX_SELECTORS doesn't make much sense
300 since they will never be used, but some implementations might
301 "round up" the number of selectors, so just ignore those. */
302 if (i
< BZ_MAX_SELECTORS
)
303 s
->selectorMtf
[i
] = j
;
305 if (nSelectors
> BZ_MAX_SELECTORS
)
306 nSelectors
= BZ_MAX_SELECTORS
;
308 /*--- Undo the MTF values for the selectors. ---*/
310 UChar pos
[BZ_N_GROUPS
], tmp
, v
;
311 for (v
= 0; v
< nGroups
; v
++) pos
[v
] = v
;
313 for (i
= 0; i
< nSelectors
; i
++) {
314 v
= s
->selectorMtf
[i
];
316 while (v
> 0) { pos
[v
] = pos
[v
-1]; v
--; }
318 s
->selector
[i
] = tmp
;
322 /*--- Now the coding tables ---*/
323 for (t
= 0; t
< nGroups
; t
++) {
324 GET_BITS(BZ_X_CODING_1
, curr
, 5);
325 for (i
= 0; i
< alphaSize
; i
++) {
327 if (curr
< 1 || curr
> 20) RETURN(BZ_DATA_ERROR
);
328 GET_BIT(BZ_X_CODING_2
, uc
);
330 GET_BIT(BZ_X_CODING_3
, uc
);
331 if (uc
== 0) curr
++; else curr
--;
337 /*--- Create the Huffman decoding tables ---*/
338 for (t
= 0; t
< nGroups
; t
++) {
341 for (i
= 0; i
< alphaSize
; i
++) {
342 if (s
->len
[t
][i
] > maxLen
) maxLen
= s
->len
[t
][i
];
343 if (s
->len
[t
][i
] < minLen
) minLen
= s
->len
[t
][i
];
345 BZ2_hbCreateDecodeTables (
350 minLen
, maxLen
, alphaSize
352 s
->minLens
[t
] = minLen
;
355 /*--- Now the MTF values ---*/
358 nblockMAX
= 100000 * s
->blockSize100k
;
362 for (i
= 0; i
<= 255; i
++) s
->unzftab
[i
] = 0;
368 for (ii
= 256 / MTFL_SIZE
- 1; ii
>= 0; ii
--) {
369 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
370 s
->mtfa
[kk
] = (UChar
)(ii
* MTFL_SIZE
+ jj
);
373 s
->mtfbase
[ii
] = kk
+ 1;
376 /*-- end MTF init --*/
379 GET_MTF_VAL(BZ_X_MTF_1
, BZ_X_MTF_2
, nextSym
);
383 if (nextSym
== EOB
) break;
385 if (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
) {
390 /* Check that N doesn't get too big, so that es doesn't
391 go negative. The maximum value that can be
392 RUNA/RUNB encoded is equal to the block size (post
393 the initial RLE), viz, 900k, so bounding N at 2
394 million should guard against overflow without
395 rejecting any legitimate inputs. */
396 if (N
>= 2*1024*1024) RETURN(BZ_DATA_ERROR
);
397 if (nextSym
== BZ_RUNA
) es
= es
+ (0+1) * N
; else
398 if (nextSym
== BZ_RUNB
) es
= es
+ (1+1) * N
;
400 GET_MTF_VAL(BZ_X_MTF_3
, BZ_X_MTF_4
, nextSym
);
402 while (nextSym
== BZ_RUNA
|| nextSym
== BZ_RUNB
);
405 uc
= s
->seqToUnseq
[ s
->mtfa
[s
->mtfbase
[0]] ];
406 s
->unzftab
[uc
] += es
;
408 if (s
->smallDecompress
)
410 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
411 s
->ll16
[nblock
] = (UInt16
)uc
;
417 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
418 s
->tt
[nblock
] = (UInt32
)uc
;
427 if (nblock
>= nblockMAX
) RETURN(BZ_DATA_ERROR
);
429 /*-- uc = MTF ( nextSym-1 ) --*/
431 Int32 ii
, jj
, kk
, pp
, lno
, off
;
433 nn
= (UInt32
)(nextSym
- 1);
435 if (nn
< MTFL_SIZE
) {
436 /* avoid general-case expense */
441 s
->mtfa
[(z
) ] = s
->mtfa
[(z
)-1];
442 s
->mtfa
[(z
)-1] = s
->mtfa
[(z
)-2];
443 s
->mtfa
[(z
)-2] = s
->mtfa
[(z
)-3];
444 s
->mtfa
[(z
)-3] = s
->mtfa
[(z
)-4];
448 s
->mtfa
[(pp
+nn
)] = s
->mtfa
[(pp
+nn
)-1]; nn
--;
453 lno
= nn
/ MTFL_SIZE
;
454 off
= nn
% MTFL_SIZE
;
455 pp
= s
->mtfbase
[lno
] + off
;
457 while (pp
> s
->mtfbase
[lno
]) {
458 s
->mtfa
[pp
] = s
->mtfa
[pp
-1]; pp
--;
463 s
->mtfa
[s
->mtfbase
[lno
]]
464 = s
->mtfa
[s
->mtfbase
[lno
-1] + MTFL_SIZE
- 1];
468 s
->mtfa
[s
->mtfbase
[0]] = uc
;
469 if (s
->mtfbase
[0] == 0) {
471 for (ii
= 256 / MTFL_SIZE
-1; ii
>= 0; ii
--) {
472 for (jj
= MTFL_SIZE
-1; jj
>= 0; jj
--) {
473 s
->mtfa
[kk
] = s
->mtfa
[s
->mtfbase
[ii
] + jj
];
476 s
->mtfbase
[ii
] = kk
+ 1;
481 /*-- end uc = MTF ( nextSym-1 ) --*/
483 s
->unzftab
[s
->seqToUnseq
[uc
]]++;
484 if (s
->smallDecompress
)
485 s
->ll16
[nblock
] = (UInt16
)(s
->seqToUnseq
[uc
]); else
486 s
->tt
[nblock
] = (UInt32
)(s
->seqToUnseq
[uc
]);
489 GET_MTF_VAL(BZ_X_MTF_5
, BZ_X_MTF_6
, nextSym
);
494 /* Now we know what nblock is, we can do a better sanity
497 if (s
->origPtr
< 0 || s
->origPtr
>= nblock
)
498 RETURN(BZ_DATA_ERROR
);
500 /*-- Set up cftab to facilitate generation of T^(-1) --*/
501 /* Check: unzftab entries in range. */
502 for (i
= 0; i
<= 255; i
++) {
503 if (s
->unzftab
[i
] < 0 || s
->unzftab
[i
] > nblock
)
504 RETURN(BZ_DATA_ERROR
);
506 /* Actually generate cftab. */
508 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] = s
->unzftab
[i
-1];
509 for (i
= 1; i
<= 256; i
++) s
->cftab
[i
] += s
->cftab
[i
-1];
510 /* Check: cftab entries in range. */
511 for (i
= 0; i
<= 256; i
++) {
512 if (s
->cftab
[i
] < 0 || s
->cftab
[i
] > nblock
) {
513 /* s->cftab[i] can legitimately be == nblock */
514 RETURN(BZ_DATA_ERROR
);
517 /* Check: cftab entries non-descending. */
518 for (i
= 1; i
<= 256; i
++) {
519 if (s
->cftab
[i
-1] > s
->cftab
[i
]) {
520 RETURN(BZ_DATA_ERROR
);
524 s
->state_out_len
= 0;
526 BZ_INITIALISE_CRC ( s
->calculatedBlockCRC
);
527 s
->state
= BZ_X_OUTPUT
;
528 if (s
->verbosity
>= 2) VPrintf0 ( "rt+rld" );
530 if (s
->smallDecompress
) {
532 /*-- Make a copy of cftab, used in generation of T --*/
533 for (i
= 0; i
<= 256; i
++) s
->cftabCopy
[i
] = s
->cftab
[i
];
535 /*-- compute the T vector --*/
536 for (i
= 0; i
< nblock
; i
++) {
537 uc
= (UChar
)(s
->ll16
[i
]);
538 SET_LL(i
, s
->cftabCopy
[uc
]);
542 /*-- Compute T^(-1) by pointer reversal on T --*/
546 Int32 tmp
= GET_LL(j
);
551 while (i
!= s
->origPtr
);
553 s
->tPos
= s
->origPtr
;
555 if (s
->blockRandomised
) {
557 BZ_GET_SMALL(s
->k0
); s
->nblock_used
++;
558 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
560 BZ_GET_SMALL(s
->k0
); s
->nblock_used
++;
565 /*-- compute the T^(-1) vector --*/
566 for (i
= 0; i
< nblock
; i
++) {
567 uc
= (UChar
)(s
->tt
[i
] & 0xff);
568 s
->tt
[s
->cftab
[uc
]] |= (i
<< 8);
572 s
->tPos
= s
->tt
[s
->origPtr
] >> 8;
574 if (s
->blockRandomised
) {
576 BZ_GET_FAST(s
->k0
); s
->nblock_used
++;
577 BZ_RAND_UPD_MASK
; s
->k0
^= BZ_RAND_MASK
;
579 BZ_GET_FAST(s
->k0
); s
->nblock_used
++;
590 GET_UCHAR(BZ_X_ENDHDR_2
, uc
);
591 if (uc
!= 0x72) RETURN(BZ_DATA_ERROR
);
592 GET_UCHAR(BZ_X_ENDHDR_3
, uc
);
593 if (uc
!= 0x45) RETURN(BZ_DATA_ERROR
);
594 GET_UCHAR(BZ_X_ENDHDR_4
, uc
);
595 if (uc
!= 0x38) RETURN(BZ_DATA_ERROR
);
596 GET_UCHAR(BZ_X_ENDHDR_5
, uc
);
597 if (uc
!= 0x50) RETURN(BZ_DATA_ERROR
);
598 GET_UCHAR(BZ_X_ENDHDR_6
, uc
);
599 if (uc
!= 0x90) RETURN(BZ_DATA_ERROR
);
601 s
->storedCombinedCRC
= 0;
602 GET_UCHAR(BZ_X_CCRC_1
, uc
);
603 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
604 GET_UCHAR(BZ_X_CCRC_2
, uc
);
605 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
606 GET_UCHAR(BZ_X_CCRC_3
, uc
);
607 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
608 GET_UCHAR(BZ_X_CCRC_4
, uc
);
609 s
->storedCombinedCRC
= (s
->storedCombinedCRC
<< 8) | ((UInt32
)uc
);
611 s
->state
= BZ_X_IDLE
;
612 RETURN(BZ_STREAM_END
);
614 default: AssertH ( False
, 4001 );
617 AssertH ( False
, 4002 );
619 save_state_and_return
:
624 s
->save_alphaSize
= alphaSize
;
625 s
->save_nGroups
= nGroups
;
626 s
->save_nSelectors
= nSelectors
;
628 s
->save_groupNo
= groupNo
;
629 s
->save_groupPos
= groupPos
;
630 s
->save_nextSym
= nextSym
;
631 s
->save_nblockMAX
= nblockMAX
;
632 s
->save_nblock
= nblock
;
641 s
->save_gMinlen
= gMinlen
;
642 s
->save_gLimit
= gLimit
;
643 s
->save_gBase
= gBase
;
644 s
->save_gPerm
= gPerm
;
650 /*-------------------------------------------------------------*/
651 /*--- end decompress.c ---*/
652 /*-------------------------------------------------------------*/