2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 typedef unsigned char Bool
;
23 typedef unsigned char UChar
;
24 #if __SIZEOF_INT__ == 2
26 typedef unsigned long UInt32
;
29 typedef unsigned int UInt32
;
32 typedef unsigned short UInt16
;
34 #define True ((Bool)1)
35 #define False ((Bool)0)
38 #define BZ_M_RUNNING 2
39 #define BZ_M_FLUSHING 3
40 #define BZ_M_FINISHING 4
48 #define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
50 /*---------------------------------------------*/
51 /*--- Fallback O(N log(N)^2) sorting ---*/
52 /*--- algorithm, for repetitive blocks ---*/
53 /*---------------------------------------------*/
55 /*---------------------------------------------*/
56 void fallbackSimpleSort ( UInt32
* fmap
,
67 for ( i
= hi
-4; i
>= lo
; i
-- ) {
70 for ( j
= i
+4; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
+= 4 )
76 for ( i
= hi
-1; i
>= lo
; i
-- ) {
79 for ( j
= i
+1; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
++ )
86 /*---------------------------------------------*/
87 #define fswap(zz1, zz2) \
88 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
90 #define fvswap(zzp1, zzp2, zzn) \
92 Int32 yyp1 = (zzp1); \
93 Int32 yyp2 = (zzp2); \
96 fswap(fmap[yyp1], fmap[yyp2]); \
97 yyp1++; yyp2++; yyn--; \
102 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
104 #define fpush(lz,hz) { stackLo[sp] = lz; \
108 #define fpop(lz,hz) { sp--; \
112 #define FALLBACK_QSORT_SMALL_THRESH 10
113 #define FALLBACK_QSORT_STACK_SIZE 100
116 void fallbackQSort3 ( UInt32
* fmap
,
121 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
;
124 Int32 stackLo
[FALLBACK_QSORT_STACK_SIZE
];
125 Int32 stackHi
[FALLBACK_QSORT_STACK_SIZE
];
130 fpush ( loSt
, hiSt
);
135 if (hi
- lo
< FALLBACK_QSORT_SMALL_THRESH
) {
136 fallbackSimpleSort ( fmap
, eclass
, lo
, hi
);
140 /* Random partitioning. Median of 3 sometimes fails to
141 avoid bad cases. Median of 9 seems to help but
142 looks rather expensive. This too seems to work but
143 is cheaper. Guidance for the magic constants
144 7621 and 32768 is taken from Sedgewick's algorithms
147 r
= ((r
* 7621) + 1) % 32768;
149 if (r3
== 0) med
= eclass
[fmap
[lo
]]; else
150 if (r3
== 1) med
= eclass
[fmap
[(lo
+hi
)>>1]]; else
151 med
= eclass
[fmap
[hi
]];
158 if (unLo
> unHi
) break;
159 n
= (Int32
)eclass
[fmap
[unLo
]] - (Int32
)med
;
161 fswap(fmap
[unLo
], fmap
[ltLo
]);
169 if (unLo
> unHi
) break;
170 n
= (Int32
)eclass
[fmap
[unHi
]] - (Int32
)med
;
172 fswap(fmap
[unHi
], fmap
[gtHi
]);
179 if (unLo
> unHi
) break;
180 fswap(fmap
[unLo
], fmap
[unHi
]); unLo
++; unHi
--;
183 if (gtHi
< ltLo
) continue;
185 n
= fmin(ltLo
-lo
, unLo
-ltLo
); fvswap(lo
, unLo
-n
, n
);
186 m
= fmin(hi
-gtHi
, gtHi
-unHi
); fvswap(unLo
, hi
-m
+1, m
);
188 n
= lo
+ unLo
- ltLo
- 1;
189 m
= hi
- (gtHi
- unHi
) + 1;
191 if (n
- lo
> hi
- m
) {
206 #undef FALLBACK_QSORT_SMALL_THRESH
207 #undef FALLBACK_QSORT_STACK_SIZE
210 /*---------------------------------------------*/
213 eclass exists for [0 .. nblock-1]
214 ((UChar*)eclass) [0 .. nblock-1] holds block
215 ptr exists for [0 .. nblock-1]
218 ((UChar*)eclass) [0 .. nblock-1] holds block
219 All other areas of eclass destroyed
220 fmap [0 .. nblock-1] holds sorted order
221 bhtab [ 0 .. 2+(nblock/32) ] destroyed
224 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
225 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
226 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
227 #define WORD_BH(zz) bhtab[(zz) >> 5]
228 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
230 void fallbackSort ( UInt32
* fmap
,
238 Int32 H
, i
, j
, k
, l
, r
, cc
, cc1
;
241 UChar
* eclass8
= (UChar
*)eclass
;
244 Initial 1-char radix sort to generate
245 initial fmap and initial BH bits.
247 for (i
= 0; i
< 257; i
++) ftab
[i
] = 0;
248 for (i
= 0; i
< nblock
; i
++) ftab
[eclass8
[i
]]++;
249 for (i
= 0; i
< 256; i
++) ftabCopy
[i
] = ftab
[i
];
250 for (i
= 1; i
< 257; i
++) ftab
[i
] += ftab
[i
-1];
252 for (i
= 0; i
< nblock
; i
++) {
259 nBhtab
= 2 + (nblock
/ 32);
260 for (i
= 0; i
< nBhtab
; i
++) bhtab
[i
] = 0;
261 for (i
= 0; i
< 256; i
++) SET_BH(ftab
[i
]);
264 Inductively refine the buckets. Kind-of an
265 "exponential radix sort" (!), inspired by the
266 Manber-Myers suffix array construction algorithm.
269 /*-- set sentinel bits for block-end detection --*/
270 for (i
= 0; i
< 32; i
++) {
271 SET_BH(nblock
+ 2*i
);
272 CLEAR_BH(nblock
+ 2*i
+ 1);
275 /*-- the log(N) loop --*/
281 for (i
= 0; i
< nblock
; i
++) {
282 if (ISSET_BH(i
)) j
= i
;
283 k
= fmap
[i
] - H
; if (k
< 0) k
+= nblock
;
291 /*-- find the next non-singleton bucket --*/
293 while (ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
295 while (WORD_BH(k
) == 0xffffffff) k
+= 32;
296 while (ISSET_BH(k
)) k
++;
299 if (l
>= nblock
) break;
300 while (!ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
302 while (WORD_BH(k
) == 0x00000000) k
+= 32;
303 while (!ISSET_BH(k
)) k
++;
306 if (r
>= nblock
) break;
308 /*-- now [l, r] bracket current bucket --*/
310 nNotDone
+= (r
- l
+ 1);
311 fallbackQSort3 ( fmap
, eclass
, l
, r
);
313 /*-- scan bucket and generate header bits-- */
315 for (i
= l
; i
<= r
; i
++) {
316 cc1
= eclass
[fmap
[i
]];
317 if (cc
!= cc1
) { SET_BH(i
); cc
= cc1
; };
323 if (H
> nblock
|| nNotDone
== 0) break;
327 Reconstruct the original block in
328 eclass8 [0 .. nblock-1], since the
329 previous phase destroyed it.
332 for (i
= 0; i
< nblock
; i
++) {
333 while (ftabCopy
[j
] == 0) j
++;
335 eclass8
[fmap
[i
]] = (UChar
)j
;
346 /*---------------------------------------------*/
347 /*--- The main, O(N^2 log(N)) sorting ---*/
348 /*--- algorithm. Faster for "normal" ---*/
349 /*--- non-repetitive blocks. ---*/
350 /*---------------------------------------------*/
352 /*---------------------------------------------*/
353 Bool
mainGtU ( UInt32 i1
,
365 c1
= block
[i1
]; c2
= block
[i2
];
366 if (c1
!= c2
) return (c1
> c2
);
369 c1
= block
[i1
]; c2
= block
[i2
];
370 if (c1
!= c2
) return (c1
> c2
);
373 c1
= block
[i1
]; c2
= block
[i2
];
374 if (c1
!= c2
) return (c1
> c2
);
377 c1
= block
[i1
]; c2
= block
[i2
];
378 if (c1
!= c2
) return (c1
> c2
);
381 c1
= block
[i1
]; c2
= block
[i2
];
382 if (c1
!= c2
) return (c1
> c2
);
385 c1
= block
[i1
]; c2
= block
[i2
];
386 if (c1
!= c2
) return (c1
> c2
);
389 c1
= block
[i1
]; c2
= block
[i2
];
390 if (c1
!= c2
) return (c1
> c2
);
393 c1
= block
[i1
]; c2
= block
[i2
];
394 if (c1
!= c2
) return (c1
> c2
);
397 c1
= block
[i1
]; c2
= block
[i2
];
398 if (c1
!= c2
) return (c1
> c2
);
401 c1
= block
[i1
]; c2
= block
[i2
];
402 if (c1
!= c2
) return (c1
> c2
);
405 c1
= block
[i1
]; c2
= block
[i2
];
406 if (c1
!= c2
) return (c1
> c2
);
409 c1
= block
[i1
]; c2
= block
[i2
];
410 if (c1
!= c2
) return (c1
> c2
);
417 c1
= block
[i1
]; c2
= block
[i2
];
418 if (c1
!= c2
) return (c1
> c2
);
419 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
420 if (s1
!= s2
) return (s1
> s2
);
423 c1
= block
[i1
]; c2
= block
[i2
];
424 if (c1
!= c2
) return (c1
> c2
);
425 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
426 if (s1
!= s2
) return (s1
> s2
);
429 c1
= block
[i1
]; c2
= block
[i2
];
430 if (c1
!= c2
) return (c1
> c2
);
431 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
432 if (s1
!= s2
) return (s1
> s2
);
435 c1
= block
[i1
]; c2
= block
[i2
];
436 if (c1
!= c2
) return (c1
> c2
);
437 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
438 if (s1
!= s2
) return (s1
> s2
);
441 c1
= block
[i1
]; c2
= block
[i2
];
442 if (c1
!= c2
) return (c1
> c2
);
443 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
444 if (s1
!= s2
) return (s1
> s2
);
447 c1
= block
[i1
]; c2
= block
[i2
];
448 if (c1
!= c2
) return (c1
> c2
);
449 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
450 if (s1
!= s2
) return (s1
> s2
);
453 c1
= block
[i1
]; c2
= block
[i2
];
454 if (c1
!= c2
) return (c1
> c2
);
455 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
456 if (s1
!= s2
) return (s1
> s2
);
459 c1
= block
[i1
]; c2
= block
[i2
];
460 if (c1
!= c2
) return (c1
> c2
);
461 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
462 if (s1
!= s2
) return (s1
> s2
);
465 if (i1
>= nblock
) i1
-= nblock
;
466 if (i2
>= nblock
) i2
-= nblock
;
477 /*---------------------------------------------*/
479 Knuth's increments seem to work better
480 than Incerpi-Sedgewick here. Possibly
481 because the number of elems to sort is
482 usually small, typically <= 20.
484 Int32 incs
[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
485 9841, 29524, 88573, 265720,
488 void mainSimpleSort ( UInt32
* ptr
,
497 Int32 i
, j
, h
, bigN
, hp
;
501 if (bigN
< 2) return;
504 while (incs
[hp
] < bigN
) hp
++;
507 for (; hp
>= 0; hp
--) {
518 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
522 if (j
<= (lo
+ h
- 1)) break;
532 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
536 if (j
<= (lo
+ h
- 1)) break;
546 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
550 if (j
<= (lo
+ h
- 1)) break;
555 if (*budget
< 0) return;
561 /*---------------------------------------------*/
563 The following is an implementation of
564 an elegant 3-way quicksort for strings,
565 described in a paper "Fast Algorithms for
566 Sorting and Searching Strings", by Robert
567 Sedgewick and Jon L. Bentley.
570 #define mswap(zz1, zz2) \
571 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
573 #define mvswap(zzp1, zzp2, zzn) \
575 Int32 yyp1 = (zzp1); \
576 Int32 yyp2 = (zzp2); \
579 mswap(ptr[yyp1], ptr[yyp2]); \
580 yyp1++; yyp2++; yyn--; \
584 UChar
mmed3 ( UChar a
, UChar b
, UChar c
)
587 if (a
> b
) { t
= a
; a
= b
; b
= t
; };
595 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
597 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
602 #define mpop(lz,hz,dz) { sp--; \
608 #define mnextsize(az) (nextHi[az]-nextLo[az])
610 #define mnextswap(az,bz) \
612 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
613 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
614 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
617 #define MAIN_QSORT_SMALL_THRESH 20
618 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
619 #define MAIN_QSORT_STACK_SIZE 100
621 void mainQSort3 ( UInt32
* ptr
,
630 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
, med
;
633 Int32 stackLo
[MAIN_QSORT_STACK_SIZE
];
634 Int32 stackHi
[MAIN_QSORT_STACK_SIZE
];
635 Int32 stackD
[MAIN_QSORT_STACK_SIZE
];
642 mpush ( loSt
, hiSt
, dSt
);
647 if (hi
- lo
< MAIN_QSORT_SMALL_THRESH
||
648 d
> MAIN_QSORT_DEPTH_THRESH
) {
649 mainSimpleSort ( ptr
, block
, quadrant
, nblock
, lo
, hi
, d
, budget
);
650 if (*budget
< 0) return;
655 mmed3 ( block
[ptr
[ lo
]+d
],
657 block
[ptr
[ (lo
+hi
)>>1 ]+d
] );
664 if (unLo
> unHi
) break;
665 n
= ((Int32
)block
[ptr
[unLo
]+d
]) - med
;
667 mswap(ptr
[unLo
], ptr
[ltLo
]);
668 ltLo
++; unLo
++; continue;
674 if (unLo
> unHi
) break;
675 n
= ((Int32
)block
[ptr
[unHi
]+d
]) - med
;
677 mswap(ptr
[unHi
], ptr
[gtHi
]);
678 gtHi
--; unHi
--; continue;
683 if (unLo
> unHi
) break;
684 mswap(ptr
[unLo
], ptr
[unHi
]); unLo
++; unHi
--;
692 n
= mmin(ltLo
-lo
, unLo
-ltLo
); mvswap(lo
, unLo
-n
, n
);
693 m
= mmin(hi
-gtHi
, gtHi
-unHi
); mvswap(unLo
, hi
-m
+1, m
);
695 n
= lo
+ unLo
- ltLo
- 1;
696 m
= hi
- (gtHi
- unHi
) + 1;
698 nextLo
[0] = lo
; nextHi
[0] = n
; nextD
[0] = d
;
699 nextLo
[1] = m
; nextHi
[1] = hi
; nextD
[1] = d
;
700 nextLo
[2] = n
+1; nextHi
[2] = m
-1; nextD
[2] = d
+1;
702 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
703 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
704 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707 mpush (nextLo
[0], nextHi
[0], nextD
[0]);
708 mpush (nextLo
[1], nextHi
[1], nextD
[1]);
709 mpush (nextLo
[2], nextHi
[2], nextD
[2]);