2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
11 bzip2/libbzip2 version 1.0.6 of 6 September 2010
12 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
17 This program is released under the terms of the license contained
19 ------------------------------------------------------------------ */
22 #include "bzlib_private.h"
24 /*---------------------------------------------*/
25 /*--- Fallback O(N log(N)^2) sorting ---*/
26 /*--- algorithm, for repetitive blocks ---*/
27 /*---------------------------------------------*/
29 /*---------------------------------------------*/
32 void fallbackSimpleSort ( UInt32
* fmap
,
43 for ( i
= hi
-4; i
>= lo
; i
-- ) {
46 for ( j
= i
+4; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
+= 4 )
52 for ( i
= hi
-1; i
>= lo
; i
-- ) {
55 for ( j
= i
+1; j
<= hi
&& ec_tmp
> eclass
[fmap
[j
]]; j
++ )
62 /*---------------------------------------------*/
63 #define fswap(zz1, zz2) \
64 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
66 #define fvswap(zzp1, zzp2, zzn) \
68 Int32 yyp1 = (zzp1); \
69 Int32 yyp2 = (zzp2); \
72 fswap(fmap[yyp1], fmap[yyp2]); \
73 yyp1++; yyp2++; yyn--; \
78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
80 #define fpush(lz,hz) { stackLo[sp] = lz; \
84 #define fpop(lz,hz) { sp--; \
88 #define FALLBACK_QSORT_SMALL_THRESH 10
89 #define FALLBACK_QSORT_STACK_SIZE 100
93 void fallbackQSort3 ( UInt32
* fmap
,
98 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
;
101 Int32 stackLo
[FALLBACK_QSORT_STACK_SIZE
];
102 Int32 stackHi
[FALLBACK_QSORT_STACK_SIZE
];
107 fpush ( loSt
, hiSt
);
111 AssertH ( sp
< FALLBACK_QSORT_STACK_SIZE
- 1, 1004 );
114 if (hi
- lo
< FALLBACK_QSORT_SMALL_THRESH
) {
115 fallbackSimpleSort ( fmap
, eclass
, lo
, hi
);
119 /* Random partitioning. Median of 3 sometimes fails to
120 avoid bad cases. Median of 9 seems to help but
121 looks rather expensive. This too seems to work but
122 is cheaper. Guidance for the magic constants
123 7621 and 32768 is taken from Sedgewick's algorithms
126 r
= ((r
* 7621) + 1) % 32768;
128 if (r3
== 0) med
= eclass
[fmap
[lo
]]; else
129 if (r3
== 1) med
= eclass
[fmap
[(lo
+hi
)>>1]]; else
130 med
= eclass
[fmap
[hi
]];
137 if (unLo
> unHi
) break;
138 n
= (Int32
)eclass
[fmap
[unLo
]] - (Int32
)med
;
140 fswap(fmap
[unLo
], fmap
[ltLo
]);
148 if (unLo
> unHi
) break;
149 n
= (Int32
)eclass
[fmap
[unHi
]] - (Int32
)med
;
151 fswap(fmap
[unHi
], fmap
[gtHi
]);
158 if (unLo
> unHi
) break;
159 fswap(fmap
[unLo
], fmap
[unHi
]); unLo
++; unHi
--;
162 AssertD ( unHi
== unLo
-1, "fallbackQSort3(2)" );
164 if (gtHi
< ltLo
) continue;
166 n
= fmin(ltLo
-lo
, unLo
-ltLo
); fvswap(lo
, unLo
-n
, n
);
167 m
= fmin(hi
-gtHi
, gtHi
-unHi
); fvswap(unLo
, hi
-m
+1, m
);
169 n
= lo
+ unLo
- ltLo
- 1;
170 m
= hi
- (gtHi
- unHi
) + 1;
172 if (n
- lo
> hi
- m
) {
187 #undef FALLBACK_QSORT_SMALL_THRESH
188 #undef FALLBACK_QSORT_STACK_SIZE
191 /*---------------------------------------------*/
194 eclass exists for [0 .. nblock-1]
195 ((UChar*)eclass) [0 .. nblock-1] holds block
196 ptr exists for [0 .. nblock-1]
199 ((UChar*)eclass) [0 .. nblock-1] holds block
200 All other areas of eclass destroyed
201 fmap [0 .. nblock-1] holds sorted order
202 bhtab [ 0 .. 2+(nblock/32) ] destroyed
205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
208 #define WORD_BH(zz) bhtab[(zz) >> 5]
209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
212 void fallbackSort ( UInt32
* fmap
,
220 Int32 H
, i
, j
, k
, l
, r
, cc
, cc1
;
223 UChar
* eclass8
= (UChar
*)eclass
;
226 Initial 1-char radix sort to generate
227 initial fmap and initial BH bits.
230 VPrintf0 ( " bucket sorting ...\n" );
231 for (i
= 0; i
< 257; i
++) ftab
[i
] = 0;
232 for (i
= 0; i
< nblock
; i
++) ftab
[eclass8
[i
]]++;
233 for (i
= 0; i
< 256; i
++) ftabCopy
[i
] = ftab
[i
];
234 for (i
= 1; i
< 257; i
++) ftab
[i
] += ftab
[i
-1];
236 for (i
= 0; i
< nblock
; i
++) {
243 nBhtab
= 2 + (nblock
/ 32);
244 for (i
= 0; i
< nBhtab
; i
++) bhtab
[i
] = 0;
245 for (i
= 0; i
< 256; i
++) SET_BH(ftab
[i
]);
248 Inductively refine the buckets. Kind-of an
249 "exponential radix sort" (!), inspired by the
250 Manber-Myers suffix array construction algorithm.
253 /*-- set sentinel bits for block-end detection --*/
254 for (i
= 0; i
< 32; i
++) {
255 SET_BH(nblock
+ 2*i
);
256 CLEAR_BH(nblock
+ 2*i
+ 1);
259 /*-- the log(N) loop --*/
264 VPrintf1 ( " depth %6d has ", H
);
267 for (i
= 0; i
< nblock
; i
++) {
268 if (ISSET_BH(i
)) j
= i
;
269 k
= fmap
[i
] - H
; if (k
< 0) k
+= nblock
;
277 /*-- find the next non-singleton bucket --*/
279 while (ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
281 while (WORD_BH(k
) == 0xffffffff) k
+= 32;
282 while (ISSET_BH(k
)) k
++;
285 if (l
>= nblock
) break;
286 while (!ISSET_BH(k
) && UNALIGNED_BH(k
)) k
++;
288 while (WORD_BH(k
) == 0x00000000) k
+= 32;
289 while (!ISSET_BH(k
)) k
++;
292 if (r
>= nblock
) break;
294 /*-- now [l, r] bracket current bucket --*/
296 nNotDone
+= (r
- l
+ 1);
297 fallbackQSort3 ( fmap
, eclass
, l
, r
);
299 /*-- scan bucket and generate header bits-- */
301 for (i
= l
; i
<= r
; i
++) {
302 cc1
= eclass
[fmap
[i
]];
303 if (cc
!= cc1
) { SET_BH(i
); cc
= cc1
; };
309 VPrintf1 ( "%6d unresolved strings\n", nNotDone
);
312 if (H
> nblock
|| nNotDone
== 0) break;
316 Reconstruct the original block in
317 eclass8 [0 .. nblock-1], since the
318 previous phase destroyed it.
321 VPrintf0 ( " reconstructing block ...\n" );
323 for (i
= 0; i
< nblock
; i
++) {
324 while (ftabCopy
[j
] == 0) j
++;
326 eclass8
[fmap
[i
]] = (UChar
)j
;
328 AssertH ( j
< 256, 1005 );
338 /*---------------------------------------------*/
339 /*--- The main, O(N^2 log(N)) sorting ---*/
340 /*--- algorithm. Faster for "normal" ---*/
341 /*--- non-repetitive blocks. ---*/
342 /*---------------------------------------------*/
344 /*---------------------------------------------*/
347 Bool
mainGtU ( UInt32 i1
,
358 AssertD ( i1
!= i2
, "mainGtU" );
360 c1
= block
[i1
]; c2
= block
[i2
];
361 if (c1
!= c2
) return (c1
> c2
);
364 c1
= block
[i1
]; c2
= block
[i2
];
365 if (c1
!= c2
) return (c1
> c2
);
368 c1
= block
[i1
]; c2
= block
[i2
];
369 if (c1
!= c2
) return (c1
> c2
);
372 c1
= block
[i1
]; c2
= block
[i2
];
373 if (c1
!= c2
) return (c1
> c2
);
376 c1
= block
[i1
]; c2
= block
[i2
];
377 if (c1
!= c2
) return (c1
> c2
);
380 c1
= block
[i1
]; c2
= block
[i2
];
381 if (c1
!= c2
) return (c1
> c2
);
384 c1
= block
[i1
]; c2
= block
[i2
];
385 if (c1
!= c2
) return (c1
> c2
);
388 c1
= block
[i1
]; c2
= block
[i2
];
389 if (c1
!= c2
) return (c1
> c2
);
392 c1
= block
[i1
]; c2
= block
[i2
];
393 if (c1
!= c2
) return (c1
> c2
);
396 c1
= block
[i1
]; c2
= block
[i2
];
397 if (c1
!= c2
) return (c1
> c2
);
400 c1
= block
[i1
]; c2
= block
[i2
];
401 if (c1
!= c2
) return (c1
> c2
);
404 c1
= block
[i1
]; c2
= block
[i2
];
405 if (c1
!= c2
) return (c1
> c2
);
412 c1
= block
[i1
]; c2
= block
[i2
];
413 if (c1
!= c2
) return (c1
> c2
);
414 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
415 if (s1
!= s2
) return (s1
> s2
);
418 c1
= block
[i1
]; c2
= block
[i2
];
419 if (c1
!= c2
) return (c1
> c2
);
420 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
421 if (s1
!= s2
) return (s1
> s2
);
424 c1
= block
[i1
]; c2
= block
[i2
];
425 if (c1
!= c2
) return (c1
> c2
);
426 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
427 if (s1
!= s2
) return (s1
> s2
);
430 c1
= block
[i1
]; c2
= block
[i2
];
431 if (c1
!= c2
) return (c1
> c2
);
432 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
433 if (s1
!= s2
) return (s1
> s2
);
436 c1
= block
[i1
]; c2
= block
[i2
];
437 if (c1
!= c2
) return (c1
> c2
);
438 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
439 if (s1
!= s2
) return (s1
> s2
);
442 c1
= block
[i1
]; c2
= block
[i2
];
443 if (c1
!= c2
) return (c1
> c2
);
444 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
445 if (s1
!= s2
) return (s1
> s2
);
448 c1
= block
[i1
]; c2
= block
[i2
];
449 if (c1
!= c2
) return (c1
> c2
);
450 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
451 if (s1
!= s2
) return (s1
> s2
);
454 c1
= block
[i1
]; c2
= block
[i2
];
455 if (c1
!= c2
) return (c1
> c2
);
456 s1
= quadrant
[i1
]; s2
= quadrant
[i2
];
457 if (s1
!= s2
) return (s1
> s2
);
460 if (i1
>= nblock
) i1
-= nblock
;
461 if (i2
>= nblock
) i2
-= nblock
;
472 /*---------------------------------------------*/
474 Knuth's increments seem to work better
475 than Incerpi-Sedgewick here. Possibly
476 because the number of elems to sort is
477 usually small, typically <= 20.
480 Int32 incs
[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481 9841, 29524, 88573, 265720,
485 void mainSimpleSort ( UInt32
* ptr
,
494 Int32 i
, j
, h
, bigN
, hp
;
498 if (bigN
< 2) return;
501 while (incs
[hp
] < bigN
) hp
++;
504 for (; hp
>= 0; hp
--) {
515 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
519 if (j
<= (lo
+ h
- 1)) break;
529 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
533 if (j
<= (lo
+ h
- 1)) break;
543 ptr
[j
-h
]+d
, v
+d
, block
, quadrant
, nblock
, budget
547 if (j
<= (lo
+ h
- 1)) break;
552 if (*budget
< 0) return;
558 /*---------------------------------------------*/
560 The following is an implementation of
561 an elegant 3-way quicksort for strings,
562 described in a paper "Fast Algorithms for
563 Sorting and Searching Strings", by Robert
564 Sedgewick and Jon L. Bentley.
567 #define mswap(zz1, zz2) \
568 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
570 #define mvswap(zzp1, zzp2, zzn) \
572 Int32 yyp1 = (zzp1); \
573 Int32 yyp2 = (zzp2); \
576 mswap(ptr[yyp1], ptr[yyp2]); \
577 yyp1++; yyp2++; yyn--; \
583 UChar
mmed3 ( UChar a
, UChar b
, UChar c
)
586 if (a
> b
) { t
= a
; a
= b
; b
= t
; };
594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
601 #define mpop(lz,hz,dz) { sp--; \
607 #define mnextsize(az) (nextHi[az]-nextLo[az])
609 #define mnextswap(az,bz) \
611 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
616 #define MAIN_QSORT_SMALL_THRESH 20
617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618 #define MAIN_QSORT_STACK_SIZE 100
621 void mainQSort3 ( UInt32
* ptr
,
630 Int32 unLo
, unHi
, ltLo
, gtHi
, n
, m
, med
;
633 Int32 stackLo
[MAIN_QSORT_STACK_SIZE
];
634 Int32 stackHi
[MAIN_QSORT_STACK_SIZE
];
635 Int32 stackD
[MAIN_QSORT_STACK_SIZE
];
642 mpush ( loSt
, hiSt
, dSt
);
646 AssertH ( sp
< MAIN_QSORT_STACK_SIZE
- 2, 1001 );
649 if (hi
- lo
< MAIN_QSORT_SMALL_THRESH
||
650 d
> MAIN_QSORT_DEPTH_THRESH
) {
651 mainSimpleSort ( ptr
, block
, quadrant
, nblock
, lo
, hi
, d
, budget
);
652 if (*budget
< 0) return;
657 mmed3 ( block
[ptr
[ lo
]+d
],
659 block
[ptr
[ (lo
+hi
)>>1 ]+d
] );
666 if (unLo
> unHi
) break;
667 n
= ((Int32
)block
[ptr
[unLo
]+d
]) - med
;
669 mswap(ptr
[unLo
], ptr
[ltLo
]);
670 ltLo
++; unLo
++; continue;
676 if (unLo
> unHi
) break;
677 n
= ((Int32
)block
[ptr
[unHi
]+d
]) - med
;
679 mswap(ptr
[unHi
], ptr
[gtHi
]);
680 gtHi
--; unHi
--; continue;
685 if (unLo
> unHi
) break;
686 mswap(ptr
[unLo
], ptr
[unHi
]); unLo
++; unHi
--;
689 AssertD ( unHi
== unLo
-1, "mainQSort3(2)" );
696 n
= mmin(ltLo
-lo
, unLo
-ltLo
); mvswap(lo
, unLo
-n
, n
);
697 m
= mmin(hi
-gtHi
, gtHi
-unHi
); mvswap(unLo
, hi
-m
+1, m
);
699 n
= lo
+ unLo
- ltLo
- 1;
700 m
= hi
- (gtHi
- unHi
) + 1;
702 nextLo
[0] = lo
; nextHi
[0] = n
; nextD
[0] = d
;
703 nextLo
[1] = m
; nextHi
[1] = hi
; nextD
[1] = d
;
704 nextLo
[2] = n
+1; nextHi
[2] = m
-1; nextD
[2] = d
+1;
706 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
710 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
713 mpush (nextLo
[0], nextHi
[0], nextD
[0]);
714 mpush (nextLo
[1], nextHi
[1], nextD
[1]);
715 mpush (nextLo
[2], nextHi
[2], nextD
[2]);
726 #undef MAIN_QSORT_SMALL_THRESH
727 #undef MAIN_QSORT_DEPTH_THRESH
728 #undef MAIN_QSORT_STACK_SIZE
731 /*---------------------------------------------*/
734 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735 ((UChar*)block32) [0 .. nblock-1] holds block
736 ptr exists for [0 .. nblock-1]
739 ((UChar*)block32) [0 .. nblock-1] holds block
740 All other areas of block32 destroyed
741 ftab [0 .. 65536 ] destroyed
742 ptr [0 .. nblock-1] holds sorted order
743 if (*budget < 0), sorting was abandoned
746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747 #define SETMASK (1 << 21)
748 #define CLEARMASK (~(SETMASK))
751 void mainSort ( UInt32
* ptr
,
759 Int32 i
, j
, k
, ss
, sb
;
760 Int32 runningOrder
[256];
762 Int32 copyStart
[256];
767 if (verb
>= 4) VPrintf0 ( " main sort initialise ...\n" );
769 /*-- set up the 2-byte frequency table --*/
770 for (i
= 65536; i
>= 0; i
--) ftab
[i
] = 0;
774 for (; i
>= 3; i
-= 4) {
776 j
= (j
>> 8) | ( ((UInt16
)block
[i
]) << 8);
779 j
= (j
>> 8) | ( ((UInt16
)block
[i
-1]) << 8);
782 j
= (j
>> 8) | ( ((UInt16
)block
[i
-2]) << 8);
785 j
= (j
>> 8) | ( ((UInt16
)block
[i
-3]) << 8);
788 for (; i
>= 0; i
--) {
790 j
= (j
>> 8) | ( ((UInt16
)block
[i
]) << 8);
794 /*-- (emphasises close relationship of block & quadrant) --*/
795 for (i
= 0; i
< BZ_N_OVERSHOOT
; i
++) {
796 block
[nblock
+i
] = block
[i
];
797 quadrant
[nblock
+i
] = 0;
800 if (verb
>= 4) VPrintf0 ( " bucket sorting ...\n" );
802 /*-- Complete the initial radix sort --*/
803 for (i
= 1; i
<= 65536; i
++) ftab
[i
] += ftab
[i
-1];
807 for (; i
>= 3; i
-= 4) {
808 s
= (s
>> 8) | (block
[i
] << 8);
812 s
= (s
>> 8) | (block
[i
-1] << 8);
816 s
= (s
>> 8) | (block
[i
-2] << 8);
820 s
= (s
>> 8) | (block
[i
-3] << 8);
825 for (; i
>= 0; i
--) {
826 s
= (s
>> 8) | (block
[i
] << 8);
833 Now ftab contains the first loc of every small bucket.
834 Calculate the running order, from smallest to largest
837 for (i
= 0; i
<= 255; i
++) {
845 do h
= 3 * h
+ 1; while (h
<= 256);
848 for (i
= h
; i
<= 255; i
++) {
849 vv
= runningOrder
[i
];
851 while ( BIGFREQ(runningOrder
[j
-h
]) > BIGFREQ(vv
) ) {
852 runningOrder
[j
] = runningOrder
[j
-h
];
854 if (j
<= (h
- 1)) goto zero
;
857 runningOrder
[j
] = vv
;
863 The main sorting loop.
868 for (i
= 0; i
<= 255; i
++) {
871 Process big buckets, starting with the least full.
872 Basically this is a 3-step process in which we call
873 mainQSort3 to sort the small buckets [ss, j], but
874 also make a big effort to avoid the calls if we can.
876 ss
= runningOrder
[i
];
880 Complete the big bucket [ss] by quicksorting
881 any unsorted small buckets [ss, j], for j != ss.
882 Hopefully previous pointer-scanning phases have already
883 completed many of the small buckets [ss, j], so
884 we don't have to sort them at all.
886 for (j
= 0; j
<= 255; j
++) {
889 if ( ! (ftab
[sb
] & SETMASK
) ) {
890 Int32 lo
= ftab
[sb
] & CLEARMASK
;
891 Int32 hi
= (ftab
[sb
+1] & CLEARMASK
) - 1;
894 VPrintf4 ( " qsort [0x%x, 0x%x] "
896 ss
, j
, numQSorted
, hi
- lo
+ 1 );
898 ptr
, block
, quadrant
, nblock
,
899 lo
, hi
, BZ_N_RADIX
, budget
901 numQSorted
+= (hi
- lo
+ 1);
902 if (*budget
< 0) return;
909 AssertH ( !bigDone
[ss
], 1006 );
913 Now scan this big bucket [ss] so as to synthesise the
914 sorted order for small buckets [t, ss] for all t,
915 including, magically, the bucket [ss,ss] too.
916 This will avoid doing Real Work in subsequent Step 1's.
919 for (j
= 0; j
<= 255; j
++) {
920 copyStart
[j
] = ftab
[(j
<< 8) + ss
] & CLEARMASK
;
921 copyEnd
[j
] = (ftab
[(j
<< 8) + ss
+ 1] & CLEARMASK
) - 1;
923 for (j
= ftab
[ss
<< 8] & CLEARMASK
; j
< copyStart
[ss
]; j
++) {
924 k
= ptr
[j
]-1; if (k
< 0) k
+= nblock
;
927 ptr
[ copyStart
[c1
]++ ] = k
;
929 for (j
= (ftab
[(ss
+1) << 8] & CLEARMASK
) - 1; j
> copyEnd
[ss
]; j
--) {
930 k
= ptr
[j
]-1; if (k
< 0) k
+= nblock
;
933 ptr
[ copyEnd
[c1
]-- ] = k
;
937 AssertH ( (copyStart
[ss
]-1 == copyEnd
[ss
])
939 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940 Necessity for this case is demonstrated by compressing
941 a sequence of approximately 48.5 million of character
942 251; 1.0.0/1.0.1 will then die here. */
943 (copyStart
[ss
] == 0 && copyEnd
[ss
] == nblock
-1),
946 for (j
= 0; j
<= 255; j
++) ftab
[(j
<< 8) + ss
] |= SETMASK
;
950 The [ss] big bucket is now done. Record this fact,
951 and update the quadrant descriptors. Remember to
952 update quadrants in the overshoot area too, if
953 necessary. The "if (i < 255)" test merely skips
954 this updating for the last bucket processed, since
955 updating for the last bucket is pointless.
957 The quadrant array provides a way to incrementally
958 cache sort orderings, as they appear, so as to
959 make subsequent comparisons in fullGtU() complete
960 faster. For repetitive blocks this makes a big
961 difference (but not big enough to be able to avoid
962 the fallback sorting mechanism, exponential radix sort).
964 The precise meaning is: at all times:
966 for 0 <= i < nblock and 0 <= j <= nblock
968 if block[i] != block[j],
970 then the relative values of quadrant[i] and
971 quadrant[j] are meaningless.
974 if quadrant[i] < quadrant[j]
975 then the string starting at i lexicographically
976 precedes the string starting at j
978 else if quadrant[i] > quadrant[j]
979 then the string starting at j lexicographically
980 precedes the string starting at i
983 the relative ordering of the strings starting
984 at i and j has not yet been determined.
990 Int32 bbStart
= ftab
[ss
<< 8] & CLEARMASK
;
991 Int32 bbSize
= (ftab
[(ss
+1) << 8] & CLEARMASK
) - bbStart
;
994 while ((bbSize
>> shifts
) > 65534) shifts
++;
996 for (j
= bbSize
-1; j
>= 0; j
--) {
997 Int32 a2update
= ptr
[bbStart
+ j
];
998 UInt16 qVal
= (UInt16
)(j
>> shifts
);
999 quadrant
[a2update
] = qVal
;
1000 if (a2update
< BZ_N_OVERSHOOT
)
1001 quadrant
[a2update
+ nblock
] = qVal
;
1003 AssertH ( ((bbSize
-1) >> shifts
) <= 65535, 1002 );
1009 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010 nblock
, numQSorted
, nblock
- numQSorted
);
1018 /*---------------------------------------------*/
1021 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022 ((UChar*)arr2) [0 .. nblock-1] holds block
1023 arr1 exists for [0 .. nblock-1]
1026 ((UChar*)arr2) [0 .. nblock-1] holds block
1027 All other areas of block destroyed
1028 ftab [ 0 .. 65536 ] destroyed
1029 arr1 [0 .. nblock-1] holds sorted order
1031 void BZ2_blockSort ( EState
* s
)
1033 UInt32
* ptr
= s
->ptr
;
1034 UChar
* block
= s
->block
;
1035 UInt32
* ftab
= s
->ftab
;
1036 Int32 nblock
= s
->nblock
;
1037 Int32 verb
= s
->verbosity
;
1038 Int32 wfact
= s
->workFactor
;
1044 if (nblock
< 10000) {
1045 fallbackSort ( s
->arr1
, s
->arr2
, ftab
, nblock
, verb
);
1047 /* Calculate the location for quadrant, remembering to get
1048 the alignment right. Assumes that &(block[0]) is at least
1049 2-byte aligned -- this should be ok since block is really
1050 the first section of arr2.
1052 i
= nblock
+BZ_N_OVERSHOOT
;
1054 quadrant
= (UInt16
*)(&(block
[i
]));
1056 /* (wfact-1) / 3 puts the default-factor-30
1057 transition point at very roughly the same place as
1058 with v0.1 and v0.9.0.
1059 Not that it particularly matters any more, since the
1060 resulting compressed stream is now the same regardless
1061 of whether or not we use the main sort or fallback sort.
1063 if (wfact
< 1 ) wfact
= 1;
1064 if (wfact
> 100) wfact
= 100;
1065 budgetInit
= nblock
* ((wfact
-1) / 3);
1066 budget
= budgetInit
;
1068 mainSort ( ptr
, block
, quadrant
, ftab
, nblock
, verb
, &budget
);
1070 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071 budgetInit
- budget
,
1073 (float)(budgetInit
- budget
) /
1074 (float)(nblock
==0 ? 1 : nblock
) );
1077 VPrintf0 ( " too repetitive; using fallback"
1078 " sorting algorithm\n" );
1079 fallbackSort ( s
->arr1
, s
->arr2
, ftab
, nblock
, verb
);
1084 for (i
= 0; i
< s
->nblock
; i
++)
1086 { s
->origPtr
= i
; break; };
1088 AssertH( s
->origPtr
!= -1, 1003 );
1092 /*-------------------------------------------------------------*/
1093 /*--- end blocksort.c ---*/
1094 /*-------------------------------------------------------------*/