xref: /oneTBB/examples/graph/fgbzip2/blocksort.cpp (revision b15aabb3)
1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*-------------------------------------------------------------*/
18d86ed7fbStbbdev /*--- Block sorting machinery                               ---*/
19d86ed7fbStbbdev /*---                                         blocksort.cpp ---*/
20d86ed7fbStbbdev /*-------------------------------------------------------------*/
21d86ed7fbStbbdev 
22d86ed7fbStbbdev /* ------------------------------------------------------------------
23d86ed7fbStbbdev     The original source for this example:
24d86ed7fbStbbdev     This file is part of bzip2/libbzip2, a program and library for
25d86ed7fbStbbdev     lossless, block-sorting data compression.
26d86ed7fbStbbdev 
27d86ed7fbStbbdev     bzip2/libbzip2 version 1.0.6 of 6 September 2010
28d86ed7fbStbbdev     Copyright (C) 1996-2010 Julian Seward <[email protected]>
29d86ed7fbStbbdev 
30d86ed7fbStbbdev     This program, "bzip2", the associated library "libbzip2", and all
31d86ed7fbStbbdev     documentation, are copyright (C) 1996-2010 Julian R Seward.  All
32d86ed7fbStbbdev     rights reserved.
33d86ed7fbStbbdev 
34d86ed7fbStbbdev     Redistribution and use in source and binary forms, with or without
35d86ed7fbStbbdev     modification, are permitted provided that the following conditions
36d86ed7fbStbbdev     are met:
37d86ed7fbStbbdev 
38d86ed7fbStbbdev     1. Redistributions of source code must retain the above copyright
39d86ed7fbStbbdev     notice, this list of conditions and the following disclaimer.
40d86ed7fbStbbdev 
41d86ed7fbStbbdev     2. The origin of this software must not be misrepresented; you must
42d86ed7fbStbbdev     not claim that you wrote the original software.  If you use this
43d86ed7fbStbbdev     software in a product, an acknowledgment in the product
44d86ed7fbStbbdev     documentation would be appreciated but is not required.
45d86ed7fbStbbdev 
46d86ed7fbStbbdev     3. Altered source versions must be plainly marked as such, and must
47d86ed7fbStbbdev     not be misrepresented as being the original software.
48d86ed7fbStbbdev 
49d86ed7fbStbbdev     4. The name of the author may not be used to endorse or promote
50d86ed7fbStbbdev     products derived from this software without specific prior written
51d86ed7fbStbbdev     permission.
52d86ed7fbStbbdev 
53d86ed7fbStbbdev     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54d86ed7fbStbbdev     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55d86ed7fbStbbdev     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56d86ed7fbStbbdev     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57d86ed7fbStbbdev     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58d86ed7fbStbbdev     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59d86ed7fbStbbdev     GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60d86ed7fbStbbdev     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61d86ed7fbStbbdev     WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62d86ed7fbStbbdev     NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63d86ed7fbStbbdev     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64d86ed7fbStbbdev 
65d86ed7fbStbbdev     Julian Seward, [email protected]
66d86ed7fbStbbdev     bzip2/libbzip2 version 1.0.6 of 6 September 2010
67d86ed7fbStbbdev    ------------------------------------------------------------------ */
68d86ed7fbStbbdev 
69d86ed7fbStbbdev #include "bzlib_private.hpp"
70d86ed7fbStbbdev 
71d86ed7fbStbbdev /*---------------------------------------------*/
72d86ed7fbStbbdev /*--- Fallback O(N log(N)^2) sorting        ---*/
73d86ed7fbStbbdev /*--- algorithm, for repetitive blocks      ---*/
74d86ed7fbStbbdev /*---------------------------------------------*/
75d86ed7fbStbbdev 
76d86ed7fbStbbdev /*---------------------------------------------*/
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)77d86ed7fbStbbdev static __inline__ void fallbackSimpleSort(UInt32* fmap, UInt32* eclass, Int32 lo, Int32 hi) {
78d86ed7fbStbbdev     Int32 i, j, tmp;
79d86ed7fbStbbdev     UInt32 ec_tmp;
80d86ed7fbStbbdev 
81d86ed7fbStbbdev     if (lo == hi)
82d86ed7fbStbbdev         return;
83d86ed7fbStbbdev 
84d86ed7fbStbbdev     if (hi - lo > 3) {
85d86ed7fbStbbdev         for (i = hi - 4; i >= lo; i--) {
86d86ed7fbStbbdev             tmp = fmap[i];
87d86ed7fbStbbdev             ec_tmp = eclass[tmp];
88d86ed7fbStbbdev             for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
89d86ed7fbStbbdev                 fmap[j - 4] = fmap[j];
90d86ed7fbStbbdev             fmap[j - 4] = tmp;
91d86ed7fbStbbdev         }
92d86ed7fbStbbdev     }
93d86ed7fbStbbdev 
94d86ed7fbStbbdev     for (i = hi - 1; i >= lo; i--) {
95d86ed7fbStbbdev         tmp = fmap[i];
96d86ed7fbStbbdev         ec_tmp = eclass[tmp];
97d86ed7fbStbbdev         for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
98d86ed7fbStbbdev             fmap[j - 1] = fmap[j];
99d86ed7fbStbbdev         fmap[j - 1] = tmp;
100d86ed7fbStbbdev     }
101d86ed7fbStbbdev }
102d86ed7fbStbbdev 
103d86ed7fbStbbdev /*---------------------------------------------*/
104d86ed7fbStbbdev #define fswap(zz1, zz2)    \
105d86ed7fbStbbdev     {                      \
106d86ed7fbStbbdev         Int32 zztmp = zz1; \
107d86ed7fbStbbdev         zz1 = zz2;         \
108d86ed7fbStbbdev         zz2 = zztmp;       \
109d86ed7fbStbbdev     }
110d86ed7fbStbbdev 
111d86ed7fbStbbdev #define fvswap(zzp1, zzp2, zzn)            \
112d86ed7fbStbbdev     {                                      \
113d86ed7fbStbbdev         Int32 yyp1 = (zzp1);               \
114d86ed7fbStbbdev         Int32 yyp2 = (zzp2);               \
115d86ed7fbStbbdev         Int32 yyn = (zzn);                 \
116d86ed7fbStbbdev         while (yyn > 0) {                  \
117d86ed7fbStbbdev             fswap(fmap[yyp1], fmap[yyp2]); \
118d86ed7fbStbbdev             yyp1++;                        \
119d86ed7fbStbbdev             yyp2++;                        \
120d86ed7fbStbbdev             yyn--;                         \
121d86ed7fbStbbdev         }                                  \
122d86ed7fbStbbdev     }
123d86ed7fbStbbdev 
124d86ed7fbStbbdev #define fmin(a, b) ((a) < (b)) ? (a) : (b)
125d86ed7fbStbbdev 
126d86ed7fbStbbdev #define fpush(lz, hz)     \
127d86ed7fbStbbdev     {                     \
128d86ed7fbStbbdev         stackLo[sp] = lz; \
129d86ed7fbStbbdev         stackHi[sp] = hz; \
130d86ed7fbStbbdev         sp++;             \
131d86ed7fbStbbdev     }
132d86ed7fbStbbdev 
133d86ed7fbStbbdev #define fpop(lz, hz)      \
134d86ed7fbStbbdev     {                     \
135d86ed7fbStbbdev         sp--;             \
136d86ed7fbStbbdev         lz = stackLo[sp]; \
137d86ed7fbStbbdev         hz = stackHi[sp]; \
138d86ed7fbStbbdev     }
139d86ed7fbStbbdev 
140d86ed7fbStbbdev #define FALLBACK_QSORT_SMALL_THRESH 10
141d86ed7fbStbbdev #define FALLBACK_QSORT_STACK_SIZE   100
142d86ed7fbStbbdev 
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)143d86ed7fbStbbdev static void fallbackQSort3(UInt32* fmap, UInt32* eclass, Int32 loSt, Int32 hiSt) {
144d86ed7fbStbbdev     Int32 unLo, unHi, ltLo, gtHi, n, m;
145d86ed7fbStbbdev     Int32 sp, lo, hi;
146d86ed7fbStbbdev     UInt32 med, r, r3;
147d86ed7fbStbbdev     Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
148d86ed7fbStbbdev     Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
149d86ed7fbStbbdev 
150d86ed7fbStbbdev     r = 0;
151d86ed7fbStbbdev 
152d86ed7fbStbbdev     sp = 0;
153d86ed7fbStbbdev     fpush(loSt, hiSt);
154d86ed7fbStbbdev 
155d86ed7fbStbbdev     while (sp > 0) {
156d86ed7fbStbbdev         AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
157d86ed7fbStbbdev 
158d86ed7fbStbbdev         fpop(lo, hi);
159d86ed7fbStbbdev         if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
160d86ed7fbStbbdev             fallbackSimpleSort(fmap, eclass, lo, hi);
161d86ed7fbStbbdev             continue;
162d86ed7fbStbbdev         }
163d86ed7fbStbbdev 
164d86ed7fbStbbdev         /* Random partitioning.  Median of 3 sometimes fails to
165d86ed7fbStbbdev          avoid bad cases.  Median of 9 seems to help but
166d86ed7fbStbbdev          looks rather expensive.  This too seems to work but
167d86ed7fbStbbdev          is cheaper.  Guidance for the magic constants
168d86ed7fbStbbdev          7621 and 32768 is taken from Sedgewick's algorithms
169d86ed7fbStbbdev          book, chapter 35.
170d86ed7fbStbbdev       */
171d86ed7fbStbbdev         r = ((r * 7621) + 1) % 32768;
172d86ed7fbStbbdev         r3 = r % 3;
173d86ed7fbStbbdev         if (r3 == 0)
174d86ed7fbStbbdev             med = eclass[fmap[lo]];
175d86ed7fbStbbdev         else if (r3 == 1)
176d86ed7fbStbbdev             med = eclass[fmap[(lo + hi) >> 1]];
177d86ed7fbStbbdev         else
178d86ed7fbStbbdev             med = eclass[fmap[hi]];
179d86ed7fbStbbdev 
180d86ed7fbStbbdev         unLo = ltLo = lo;
181d86ed7fbStbbdev         unHi = gtHi = hi;
182d86ed7fbStbbdev 
183d86ed7fbStbbdev         while (1) {
184d86ed7fbStbbdev             while (1) {
185d86ed7fbStbbdev                 if (unLo > unHi)
186d86ed7fbStbbdev                     break;
187d86ed7fbStbbdev                 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
188d86ed7fbStbbdev                 if (n == 0) {
189d86ed7fbStbbdev                     fswap(fmap[unLo], fmap[ltLo]);
190d86ed7fbStbbdev                     ltLo++;
191d86ed7fbStbbdev                     unLo++;
192d86ed7fbStbbdev                     continue;
193d86ed7fbStbbdev                 };
194d86ed7fbStbbdev                 if (n > 0)
195d86ed7fbStbbdev                     break;
196d86ed7fbStbbdev                 unLo++;
197d86ed7fbStbbdev             }
198d86ed7fbStbbdev             while (1) {
199d86ed7fbStbbdev                 if (unLo > unHi)
200d86ed7fbStbbdev                     break;
201d86ed7fbStbbdev                 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
202d86ed7fbStbbdev                 if (n == 0) {
203d86ed7fbStbbdev                     fswap(fmap[unHi], fmap[gtHi]);
204d86ed7fbStbbdev                     gtHi--;
205d86ed7fbStbbdev                     unHi--;
206d86ed7fbStbbdev                     continue;
207d86ed7fbStbbdev                 };
208d86ed7fbStbbdev                 if (n < 0)
209d86ed7fbStbbdev                     break;
210d86ed7fbStbbdev                 unHi--;
211d86ed7fbStbbdev             }
212d86ed7fbStbbdev             if (unLo > unHi)
213d86ed7fbStbbdev                 break;
214d86ed7fbStbbdev             fswap(fmap[unLo], fmap[unHi]);
215d86ed7fbStbbdev             unLo++;
216d86ed7fbStbbdev             unHi--;
217d86ed7fbStbbdev         }
218d86ed7fbStbbdev 
219d86ed7fbStbbdev         AssertD(unHi == unLo - 1, "fallbackQSort3(2)");
220d86ed7fbStbbdev 
221d86ed7fbStbbdev         if (gtHi < ltLo)
222d86ed7fbStbbdev             continue;
223d86ed7fbStbbdev 
224d86ed7fbStbbdev         n = fmin(ltLo - lo, unLo - ltLo);
225d86ed7fbStbbdev         fvswap(lo, unLo - n, n);
226d86ed7fbStbbdev         m = fmin(hi - gtHi, gtHi - unHi);
227d86ed7fbStbbdev         fvswap(unLo, hi - m + 1, m);
228d86ed7fbStbbdev 
229d86ed7fbStbbdev         n = lo + unLo - ltLo - 1;
230d86ed7fbStbbdev         m = hi - (gtHi - unHi) + 1;
231d86ed7fbStbbdev 
232d86ed7fbStbbdev         if (n - lo > hi - m) {
233d86ed7fbStbbdev             fpush(lo, n);
234d86ed7fbStbbdev             fpush(m, hi);
235d86ed7fbStbbdev         }
236d86ed7fbStbbdev         else {
237d86ed7fbStbbdev             fpush(m, hi);
238d86ed7fbStbbdev             fpush(lo, n);
239d86ed7fbStbbdev         }
240d86ed7fbStbbdev     }
241d86ed7fbStbbdev }
242d86ed7fbStbbdev 
243d86ed7fbStbbdev #undef fmin
244d86ed7fbStbbdev #undef fpush
245d86ed7fbStbbdev #undef fpop
246d86ed7fbStbbdev #undef fswap
247d86ed7fbStbbdev #undef fvswap
248d86ed7fbStbbdev #undef FALLBACK_QSORT_SMALL_THRESH
249d86ed7fbStbbdev #undef FALLBACK_QSORT_STACK_SIZE
250d86ed7fbStbbdev 
251d86ed7fbStbbdev /*---------------------------------------------*/
252d86ed7fbStbbdev /* Pre:
253d86ed7fbStbbdev       nblock > 0
254d86ed7fbStbbdev       eclass exists for [0 .. nblock-1]
255d86ed7fbStbbdev       ((UChar*)eclass) [0 .. nblock-1] holds block
256d86ed7fbStbbdev       ptr exists for [0 .. nblock-1]
257d86ed7fbStbbdev 
258d86ed7fbStbbdev    Post:
259d86ed7fbStbbdev       ((UChar*)eclass) [0 .. nblock-1] holds block
260d86ed7fbStbbdev       All other areas of eclass destroyed
261d86ed7fbStbbdev       fmap [0 .. nblock-1] holds sorted order
262d86ed7fbStbbdev       bhtab [ 0 .. 2+(nblock/32) ] destroyed
263d86ed7fbStbbdev */
264d86ed7fbStbbdev 
265d86ed7fbStbbdev #define SET_BH(zz)       bhtab[(zz) >> 5] |= (1 << ((zz)&31))
266d86ed7fbStbbdev #define CLEAR_BH(zz)     bhtab[(zz) >> 5] &= ~(1 << ((zz)&31))
267d86ed7fbStbbdev #define ISSET_BH(zz)     (bhtab[(zz) >> 5] & (1 << ((zz)&31)))
268d86ed7fbStbbdev #define WORD_BH(zz)      bhtab[(zz) >> 5]
269d86ed7fbStbbdev #define UNALIGNED_BH(zz) ((zz)&0x01f)
270d86ed7fbStbbdev 
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)271d86ed7fbStbbdev static void fallbackSort(UInt32* fmap, UInt32* eclass, UInt32* bhtab, Int32 nblock, Int32 verb) {
272d86ed7fbStbbdev     Int32 ftab[257];
273d86ed7fbStbbdev     Int32 ftabCopy[256];
274d86ed7fbStbbdev     Int32 H, i, j, k, l, r, cc, cc1;
275d86ed7fbStbbdev     Int32 nNotDone;
276d86ed7fbStbbdev     Int32 nBhtab;
277d86ed7fbStbbdev     UChar* eclass8 = (UChar*)eclass;
278d86ed7fbStbbdev 
279d86ed7fbStbbdev     /*--
280d86ed7fbStbbdev       Initial 1-char radix sort to generate
281d86ed7fbStbbdev       initial fmap and initial BH bits.
282d86ed7fbStbbdev    --*/
283d86ed7fbStbbdev     if (verb >= 4)
284d86ed7fbStbbdev         VPrintf0("        bucket sorting ...\n");
285d86ed7fbStbbdev     for (i = 0; i < 257; i++)
286d86ed7fbStbbdev         ftab[i] = 0;
287d86ed7fbStbbdev     for (i = 0; i < nblock; i++)
288d86ed7fbStbbdev         ftab[eclass8[i]]++;
289d86ed7fbStbbdev     for (i = 0; i < 256; i++)
290d86ed7fbStbbdev         ftabCopy[i] = ftab[i];
291d86ed7fbStbbdev     for (i = 1; i < 257; i++)
292d86ed7fbStbbdev         ftab[i] += ftab[i - 1];
293d86ed7fbStbbdev 
294d86ed7fbStbbdev     for (i = 0; i < nblock; i++) {
295d86ed7fbStbbdev         j = eclass8[i];
296d86ed7fbStbbdev         k = ftab[j] - 1;
297d86ed7fbStbbdev         ftab[j] = k;
298d86ed7fbStbbdev         fmap[k] = i;
299d86ed7fbStbbdev     }
300d86ed7fbStbbdev 
301d86ed7fbStbbdev     nBhtab = 2 + (nblock / 32);
302d86ed7fbStbbdev     for (i = 0; i < nBhtab; i++)
303d86ed7fbStbbdev         bhtab[i] = 0;
304d86ed7fbStbbdev     for (i = 0; i < 256; i++)
305d86ed7fbStbbdev         SET_BH(ftab[i]);
306d86ed7fbStbbdev 
307d86ed7fbStbbdev     /*--
308d86ed7fbStbbdev       Inductively refine the buckets.  Kind-of an
309d86ed7fbStbbdev       "exponential radix sort" (!), inspired by the
310d86ed7fbStbbdev       Manber-Myers suffix array construction algorithm.
311d86ed7fbStbbdev    --*/
312d86ed7fbStbbdev 
313d86ed7fbStbbdev     /*-- set sentinel bits for block-end detection --*/
314d86ed7fbStbbdev     for (i = 0; i < 32; i++) {
315d86ed7fbStbbdev         SET_BH(nblock + 2 * i);
316d86ed7fbStbbdev         CLEAR_BH(nblock + 2 * i + 1);
317d86ed7fbStbbdev     }
318d86ed7fbStbbdev 
319d86ed7fbStbbdev     /*-- the log(N) loop --*/
320d86ed7fbStbbdev     H = 1;
321d86ed7fbStbbdev     while (1) {
322d86ed7fbStbbdev         if (verb >= 4)
323d86ed7fbStbbdev             VPrintf1("        depth %6d has ", H);
324d86ed7fbStbbdev 
325d86ed7fbStbbdev         j = 0;
326d86ed7fbStbbdev         for (i = 0; i < nblock; i++) {
327d86ed7fbStbbdev             if (ISSET_BH(i))
328d86ed7fbStbbdev                 j = i;
329d86ed7fbStbbdev             k = fmap[i] - H;
330d86ed7fbStbbdev             if (k < 0)
331d86ed7fbStbbdev                 k += nblock;
332d86ed7fbStbbdev             eclass[k] = j;
333d86ed7fbStbbdev         }
334d86ed7fbStbbdev 
335d86ed7fbStbbdev         nNotDone = 0;
336d86ed7fbStbbdev         r = -1;
337d86ed7fbStbbdev         while (1) {
338d86ed7fbStbbdev             /*-- find the next non-singleton bucket --*/
339d86ed7fbStbbdev             k = r + 1;
340d86ed7fbStbbdev             while (ISSET_BH(k) && UNALIGNED_BH(k))
341d86ed7fbStbbdev                 k++;
342d86ed7fbStbbdev             if (ISSET_BH(k)) {
343d86ed7fbStbbdev                 while (WORD_BH(k) == 0xffffffff)
344d86ed7fbStbbdev                     k += 32;
345d86ed7fbStbbdev                 while (ISSET_BH(k))
346d86ed7fbStbbdev                     k++;
347d86ed7fbStbbdev             }
348d86ed7fbStbbdev             l = k - 1;
349d86ed7fbStbbdev             if (l >= nblock)
350d86ed7fbStbbdev                 break;
351d86ed7fbStbbdev             while (!ISSET_BH(k) && UNALIGNED_BH(k))
352d86ed7fbStbbdev                 k++;
353d86ed7fbStbbdev             if (!ISSET_BH(k)) {
354d86ed7fbStbbdev                 while (WORD_BH(k) == 0x00000000)
355d86ed7fbStbbdev                     k += 32;
356d86ed7fbStbbdev                 while (!ISSET_BH(k))
357d86ed7fbStbbdev                     k++;
358d86ed7fbStbbdev             }
359d86ed7fbStbbdev             r = k - 1;
360d86ed7fbStbbdev             if (r >= nblock)
361d86ed7fbStbbdev                 break;
362d86ed7fbStbbdev 
363d86ed7fbStbbdev             /*-- now [l, r] bracket current bucket --*/
364d86ed7fbStbbdev             if (r > l) {
365d86ed7fbStbbdev                 nNotDone += (r - l + 1);
366d86ed7fbStbbdev                 fallbackQSort3(fmap, eclass, l, r);
367d86ed7fbStbbdev 
368d86ed7fbStbbdev                 /*-- scan bucket and generate header bits-- */
369d86ed7fbStbbdev                 cc = -1;
370d86ed7fbStbbdev                 for (i = l; i <= r; i++) {
371d86ed7fbStbbdev                     cc1 = eclass[fmap[i]];
372d86ed7fbStbbdev                     if (cc != cc1) {
373d86ed7fbStbbdev                         SET_BH(i);
374d86ed7fbStbbdev                         cc = cc1;
375d86ed7fbStbbdev                     };
376d86ed7fbStbbdev                 }
377d86ed7fbStbbdev             }
378d86ed7fbStbbdev         }
379d86ed7fbStbbdev 
380d86ed7fbStbbdev         if (verb >= 4)
381d86ed7fbStbbdev             VPrintf1("%6d unresolved strings\n", nNotDone);
382d86ed7fbStbbdev 
383d86ed7fbStbbdev         H *= 2;
384d86ed7fbStbbdev         if (H > nblock || nNotDone == 0)
385d86ed7fbStbbdev             break;
386d86ed7fbStbbdev     }
387d86ed7fbStbbdev 
388d86ed7fbStbbdev     /*--
389d86ed7fbStbbdev       Reconstruct the original block in
390d86ed7fbStbbdev       eclass8 [0 .. nblock-1], since the
391d86ed7fbStbbdev       previous phase destroyed it.
392d86ed7fbStbbdev    --*/
393d86ed7fbStbbdev     if (verb >= 4)
394d86ed7fbStbbdev         VPrintf0("        reconstructing block ...\n");
395d86ed7fbStbbdev     j = 0;
396d86ed7fbStbbdev     for (i = 0; i < nblock; i++) {
397d86ed7fbStbbdev         while (ftabCopy[j] == 0)
398d86ed7fbStbbdev             j++;
399d86ed7fbStbbdev         ftabCopy[j]--;
400d86ed7fbStbbdev         eclass8[fmap[i]] = (UChar)j;
401d86ed7fbStbbdev     }
402d86ed7fbStbbdev     AssertH(j < 256, 1005);
403d86ed7fbStbbdev }
404d86ed7fbStbbdev 
405d86ed7fbStbbdev #undef SET_BH
406d86ed7fbStbbdev #undef CLEAR_BH
407d86ed7fbStbbdev #undef ISSET_BH
408d86ed7fbStbbdev #undef WORD_BH
409d86ed7fbStbbdev #undef UNALIGNED_BH
410d86ed7fbStbbdev 
411d86ed7fbStbbdev /*---------------------------------------------*/
412d86ed7fbStbbdev /*--- The main, O(N^2 log(N)) sorting       ---*/
413d86ed7fbStbbdev /*--- algorithm.  Faster for "normal"       ---*/
414d86ed7fbStbbdev /*--- non-repetitive blocks.                ---*/
415d86ed7fbStbbdev /*---------------------------------------------*/
416d86ed7fbStbbdev 
417d86ed7fbStbbdev /*---------------------------------------------*/
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)418d86ed7fbStbbdev static __inline__ Bool mainGtU(UInt32 i1,
419d86ed7fbStbbdev                                UInt32 i2,
420d86ed7fbStbbdev                                UChar* block,
421d86ed7fbStbbdev                                UInt16* quadrant,
422d86ed7fbStbbdev                                UInt32 nblock,
423d86ed7fbStbbdev                                Int32* budget) {
424d86ed7fbStbbdev     Int32 k;
425d86ed7fbStbbdev     UChar c1, c2;
426d86ed7fbStbbdev     UInt16 s1, s2;
427d86ed7fbStbbdev 
428d86ed7fbStbbdev     AssertD(i1 != i2, "mainGtU");
429d86ed7fbStbbdev     /* 1 */
430d86ed7fbStbbdev     c1 = block[i1];
431d86ed7fbStbbdev     c2 = block[i2];
432d86ed7fbStbbdev     if (c1 != c2)
433d86ed7fbStbbdev         return (c1 > c2);
434d86ed7fbStbbdev     i1++;
435d86ed7fbStbbdev     i2++;
436d86ed7fbStbbdev     /* 2 */
437d86ed7fbStbbdev     c1 = block[i1];
438d86ed7fbStbbdev     c2 = block[i2];
439d86ed7fbStbbdev     if (c1 != c2)
440d86ed7fbStbbdev         return (c1 > c2);
441d86ed7fbStbbdev     i1++;
442d86ed7fbStbbdev     i2++;
443d86ed7fbStbbdev     /* 3 */
444d86ed7fbStbbdev     c1 = block[i1];
445d86ed7fbStbbdev     c2 = block[i2];
446d86ed7fbStbbdev     if (c1 != c2)
447d86ed7fbStbbdev         return (c1 > c2);
448d86ed7fbStbbdev     i1++;
449d86ed7fbStbbdev     i2++;
450d86ed7fbStbbdev     /* 4 */
451d86ed7fbStbbdev     c1 = block[i1];
452d86ed7fbStbbdev     c2 = block[i2];
453d86ed7fbStbbdev     if (c1 != c2)
454d86ed7fbStbbdev         return (c1 > c2);
455d86ed7fbStbbdev     i1++;
456d86ed7fbStbbdev     i2++;
457d86ed7fbStbbdev     /* 5 */
458d86ed7fbStbbdev     c1 = block[i1];
459d86ed7fbStbbdev     c2 = block[i2];
460d86ed7fbStbbdev     if (c1 != c2)
461d86ed7fbStbbdev         return (c1 > c2);
462d86ed7fbStbbdev     i1++;
463d86ed7fbStbbdev     i2++;
464d86ed7fbStbbdev     /* 6 */
465d86ed7fbStbbdev     c1 = block[i1];
466d86ed7fbStbbdev     c2 = block[i2];
467d86ed7fbStbbdev     if (c1 != c2)
468d86ed7fbStbbdev         return (c1 > c2);
469d86ed7fbStbbdev     i1++;
470d86ed7fbStbbdev     i2++;
471d86ed7fbStbbdev     /* 7 */
472d86ed7fbStbbdev     c1 = block[i1];
473d86ed7fbStbbdev     c2 = block[i2];
474d86ed7fbStbbdev     if (c1 != c2)
475d86ed7fbStbbdev         return (c1 > c2);
476d86ed7fbStbbdev     i1++;
477d86ed7fbStbbdev     i2++;
478d86ed7fbStbbdev     /* 8 */
479d86ed7fbStbbdev     c1 = block[i1];
480d86ed7fbStbbdev     c2 = block[i2];
481d86ed7fbStbbdev     if (c1 != c2)
482d86ed7fbStbbdev         return (c1 > c2);
483d86ed7fbStbbdev     i1++;
484d86ed7fbStbbdev     i2++;
485d86ed7fbStbbdev     /* 9 */
486d86ed7fbStbbdev     c1 = block[i1];
487d86ed7fbStbbdev     c2 = block[i2];
488d86ed7fbStbbdev     if (c1 != c2)
489d86ed7fbStbbdev         return (c1 > c2);
490d86ed7fbStbbdev     i1++;
491d86ed7fbStbbdev     i2++;
492d86ed7fbStbbdev     /* 10 */
493d86ed7fbStbbdev     c1 = block[i1];
494d86ed7fbStbbdev     c2 = block[i2];
495d86ed7fbStbbdev     if (c1 != c2)
496d86ed7fbStbbdev         return (c1 > c2);
497d86ed7fbStbbdev     i1++;
498d86ed7fbStbbdev     i2++;
499d86ed7fbStbbdev     /* 11 */
500d86ed7fbStbbdev     c1 = block[i1];
501d86ed7fbStbbdev     c2 = block[i2];
502d86ed7fbStbbdev     if (c1 != c2)
503d86ed7fbStbbdev         return (c1 > c2);
504d86ed7fbStbbdev     i1++;
505d86ed7fbStbbdev     i2++;
506d86ed7fbStbbdev     /* 12 */
507d86ed7fbStbbdev     c1 = block[i1];
508d86ed7fbStbbdev     c2 = block[i2];
509d86ed7fbStbbdev     if (c1 != c2)
510d86ed7fbStbbdev         return (c1 > c2);
511d86ed7fbStbbdev     i1++;
512d86ed7fbStbbdev     i2++;
513d86ed7fbStbbdev 
514d86ed7fbStbbdev     k = nblock + 8;
515d86ed7fbStbbdev 
516d86ed7fbStbbdev     do {
517d86ed7fbStbbdev         /* 1 */
518d86ed7fbStbbdev         c1 = block[i1];
519d86ed7fbStbbdev         c2 = block[i2];
520d86ed7fbStbbdev         if (c1 != c2)
521d86ed7fbStbbdev             return (c1 > c2);
522d86ed7fbStbbdev         s1 = quadrant[i1];
523d86ed7fbStbbdev         s2 = quadrant[i2];
524d86ed7fbStbbdev         if (s1 != s2)
525d86ed7fbStbbdev             return (s1 > s2);
526d86ed7fbStbbdev         i1++;
527d86ed7fbStbbdev         i2++;
528d86ed7fbStbbdev         /* 2 */
529d86ed7fbStbbdev         c1 = block[i1];
530d86ed7fbStbbdev         c2 = block[i2];
531d86ed7fbStbbdev         if (c1 != c2)
532d86ed7fbStbbdev             return (c1 > c2);
533d86ed7fbStbbdev         s1 = quadrant[i1];
534d86ed7fbStbbdev         s2 = quadrant[i2];
535d86ed7fbStbbdev         if (s1 != s2)
536d86ed7fbStbbdev             return (s1 > s2);
537d86ed7fbStbbdev         i1++;
538d86ed7fbStbbdev         i2++;
539d86ed7fbStbbdev         /* 3 */
540d86ed7fbStbbdev         c1 = block[i1];
541d86ed7fbStbbdev         c2 = block[i2];
542d86ed7fbStbbdev         if (c1 != c2)
543d86ed7fbStbbdev             return (c1 > c2);
544d86ed7fbStbbdev         s1 = quadrant[i1];
545d86ed7fbStbbdev         s2 = quadrant[i2];
546d86ed7fbStbbdev         if (s1 != s2)
547d86ed7fbStbbdev             return (s1 > s2);
548d86ed7fbStbbdev         i1++;
549d86ed7fbStbbdev         i2++;
550d86ed7fbStbbdev         /* 4 */
551d86ed7fbStbbdev         c1 = block[i1];
552d86ed7fbStbbdev         c2 = block[i2];
553d86ed7fbStbbdev         if (c1 != c2)
554d86ed7fbStbbdev             return (c1 > c2);
555d86ed7fbStbbdev         s1 = quadrant[i1];
556d86ed7fbStbbdev         s2 = quadrant[i2];
557d86ed7fbStbbdev         if (s1 != s2)
558d86ed7fbStbbdev             return (s1 > s2);
559d86ed7fbStbbdev         i1++;
560d86ed7fbStbbdev         i2++;
561d86ed7fbStbbdev         /* 5 */
562d86ed7fbStbbdev         c1 = block[i1];
563d86ed7fbStbbdev         c2 = block[i2];
564d86ed7fbStbbdev         if (c1 != c2)
565d86ed7fbStbbdev             return (c1 > c2);
566d86ed7fbStbbdev         s1 = quadrant[i1];
567d86ed7fbStbbdev         s2 = quadrant[i2];
568d86ed7fbStbbdev         if (s1 != s2)
569d86ed7fbStbbdev             return (s1 > s2);
570d86ed7fbStbbdev         i1++;
571d86ed7fbStbbdev         i2++;
572d86ed7fbStbbdev         /* 6 */
573d86ed7fbStbbdev         c1 = block[i1];
574d86ed7fbStbbdev         c2 = block[i2];
575d86ed7fbStbbdev         if (c1 != c2)
576d86ed7fbStbbdev             return (c1 > c2);
577d86ed7fbStbbdev         s1 = quadrant[i1];
578d86ed7fbStbbdev         s2 = quadrant[i2];
579d86ed7fbStbbdev         if (s1 != s2)
580d86ed7fbStbbdev             return (s1 > s2);
581d86ed7fbStbbdev         i1++;
582d86ed7fbStbbdev         i2++;
583d86ed7fbStbbdev         /* 7 */
584d86ed7fbStbbdev         c1 = block[i1];
585d86ed7fbStbbdev         c2 = block[i2];
586d86ed7fbStbbdev         if (c1 != c2)
587d86ed7fbStbbdev             return (c1 > c2);
588d86ed7fbStbbdev         s1 = quadrant[i1];
589d86ed7fbStbbdev         s2 = quadrant[i2];
590d86ed7fbStbbdev         if (s1 != s2)
591d86ed7fbStbbdev             return (s1 > s2);
592d86ed7fbStbbdev         i1++;
593d86ed7fbStbbdev         i2++;
594d86ed7fbStbbdev         /* 8 */
595d86ed7fbStbbdev         c1 = block[i1];
596d86ed7fbStbbdev         c2 = block[i2];
597d86ed7fbStbbdev         if (c1 != c2)
598d86ed7fbStbbdev             return (c1 > c2);
599d86ed7fbStbbdev         s1 = quadrant[i1];
600d86ed7fbStbbdev         s2 = quadrant[i2];
601d86ed7fbStbbdev         if (s1 != s2)
602d86ed7fbStbbdev             return (s1 > s2);
603d86ed7fbStbbdev         i1++;
604d86ed7fbStbbdev         i2++;
605d86ed7fbStbbdev 
606d86ed7fbStbbdev         if (i1 >= nblock)
607d86ed7fbStbbdev             i1 -= nblock;
608d86ed7fbStbbdev         if (i2 >= nblock)
609d86ed7fbStbbdev             i2 -= nblock;
610d86ed7fbStbbdev 
611d86ed7fbStbbdev         k -= 8;
612d86ed7fbStbbdev         (*budget)--;
613d86ed7fbStbbdev     } while (k >= 0);
614d86ed7fbStbbdev 
615d86ed7fbStbbdev     return False;
616d86ed7fbStbbdev }
617d86ed7fbStbbdev 
618d86ed7fbStbbdev /*---------------------------------------------*/
619d86ed7fbStbbdev /*--
620d86ed7fbStbbdev    Knuth's increments seem to work better
621d86ed7fbStbbdev    than Incerpi-Sedgewick here.  Possibly
622d86ed7fbStbbdev    because the number of elements to sort
623d86ed7fbStbbdev    is usually small, typically <= 20.
624d86ed7fbStbbdev --*/
625d86ed7fbStbbdev static Int32 incs[14] = { 1,    4,    13,    40,    121,    364,    1093,
626d86ed7fbStbbdev                           3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
627d86ed7fbStbbdev 
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)628d86ed7fbStbbdev static void mainSimpleSort(UInt32* ptr,
629d86ed7fbStbbdev                            UChar* block,
630d86ed7fbStbbdev                            UInt16* quadrant,
631d86ed7fbStbbdev                            Int32 nblock,
632d86ed7fbStbbdev                            Int32 lo,
633d86ed7fbStbbdev                            Int32 hi,
634d86ed7fbStbbdev                            Int32 d,
635d86ed7fbStbbdev                            Int32* budget) {
636d86ed7fbStbbdev     Int32 i, j, h, bigN, hp;
637d86ed7fbStbbdev     UInt32 v;
638d86ed7fbStbbdev 
639d86ed7fbStbbdev     bigN = hi - lo + 1;
640d86ed7fbStbbdev     if (bigN < 2)
641d86ed7fbStbbdev         return;
642d86ed7fbStbbdev 
643d86ed7fbStbbdev     hp = 0;
644d86ed7fbStbbdev     while (incs[hp] < bigN)
645d86ed7fbStbbdev         hp++;
646d86ed7fbStbbdev     hp--;
647d86ed7fbStbbdev 
648d86ed7fbStbbdev     for (; hp >= 0; hp--) {
649d86ed7fbStbbdev         h = incs[hp];
650d86ed7fbStbbdev 
651d86ed7fbStbbdev         i = lo + h;
652d86ed7fbStbbdev         while (True) {
653d86ed7fbStbbdev             /*-- copy 1 --*/
654d86ed7fbStbbdev             if (i > hi)
655d86ed7fbStbbdev                 break;
656d86ed7fbStbbdev             v = ptr[i];
657d86ed7fbStbbdev             j = i;
658d86ed7fbStbbdev             while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
659d86ed7fbStbbdev                 ptr[j] = ptr[j - h];
660d86ed7fbStbbdev                 j = j - h;
661d86ed7fbStbbdev                 if (j <= (lo + h - 1))
662d86ed7fbStbbdev                     break;
663d86ed7fbStbbdev             }
664d86ed7fbStbbdev             ptr[j] = v;
665d86ed7fbStbbdev             i++;
666d86ed7fbStbbdev 
667d86ed7fbStbbdev             /*-- copy 2 --*/
668d86ed7fbStbbdev             if (i > hi)
669d86ed7fbStbbdev                 break;
670d86ed7fbStbbdev             v = ptr[i];
671d86ed7fbStbbdev             j = i;
672d86ed7fbStbbdev             while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
673d86ed7fbStbbdev                 ptr[j] = ptr[j - h];
674d86ed7fbStbbdev                 j = j - h;
675d86ed7fbStbbdev                 if (j <= (lo + h - 1))
676d86ed7fbStbbdev                     break;
677d86ed7fbStbbdev             }
678d86ed7fbStbbdev             ptr[j] = v;
679d86ed7fbStbbdev             i++;
680d86ed7fbStbbdev 
681d86ed7fbStbbdev             /*-- copy 3 --*/
682d86ed7fbStbbdev             if (i > hi)
683d86ed7fbStbbdev                 break;
684d86ed7fbStbbdev             v = ptr[i];
685d86ed7fbStbbdev             j = i;
686d86ed7fbStbbdev             while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
687d86ed7fbStbbdev                 ptr[j] = ptr[j - h];
688d86ed7fbStbbdev                 j = j - h;
689d86ed7fbStbbdev                 if (j <= (lo + h - 1))
690d86ed7fbStbbdev                     break;
691d86ed7fbStbbdev             }
692d86ed7fbStbbdev             ptr[j] = v;
693d86ed7fbStbbdev             i++;
694d86ed7fbStbbdev 
695d86ed7fbStbbdev             if (*budget < 0)
696d86ed7fbStbbdev                 return;
697d86ed7fbStbbdev         }
698d86ed7fbStbbdev     }
699d86ed7fbStbbdev }
700d86ed7fbStbbdev 
701d86ed7fbStbbdev /*---------------------------------------------*/
702d86ed7fbStbbdev /*--
703d86ed7fbStbbdev    The following is an implementation of
704d86ed7fbStbbdev    an elegant 3-way quicksort for strings,
705d86ed7fbStbbdev    described in a paper "Fast Algorithms for
706d86ed7fbStbbdev    Sorting and Searching Strings", by Robert
707d86ed7fbStbbdev    Sedgewick and Jon L. Bentley.
708d86ed7fbStbbdev --*/
709d86ed7fbStbbdev 
710d86ed7fbStbbdev #define mswap(zz1, zz2)    \
711d86ed7fbStbbdev     {                      \
712d86ed7fbStbbdev         Int32 zztmp = zz1; \
713d86ed7fbStbbdev         zz1 = zz2;         \
714d86ed7fbStbbdev         zz2 = zztmp;       \
715d86ed7fbStbbdev     }
716d86ed7fbStbbdev 
717d86ed7fbStbbdev #define mvswap(zzp1, zzp2, zzn)          \
718d86ed7fbStbbdev     {                                    \
719d86ed7fbStbbdev         Int32 yyp1 = (zzp1);             \
720d86ed7fbStbbdev         Int32 yyp2 = (zzp2);             \
721d86ed7fbStbbdev         Int32 yyn = (zzn);               \
722d86ed7fbStbbdev         while (yyn > 0) {                \
723d86ed7fbStbbdev             mswap(ptr[yyp1], ptr[yyp2]); \
724d86ed7fbStbbdev             yyp1++;                      \
725d86ed7fbStbbdev             yyp2++;                      \
726d86ed7fbStbbdev             yyn--;                       \
727d86ed7fbStbbdev         }                                \
728d86ed7fbStbbdev     }
729d86ed7fbStbbdev 
mmed3(UChar a,UChar b,UChar c)730d86ed7fbStbbdev static __inline__ UChar mmed3(UChar a, UChar b, UChar c) {
731d86ed7fbStbbdev     UChar t;
732d86ed7fbStbbdev     if (a > b) {
733d86ed7fbStbbdev         t = a;
734d86ed7fbStbbdev         a = b;
735d86ed7fbStbbdev         b = t;
736d86ed7fbStbbdev     };
737d86ed7fbStbbdev     if (b > c) {
738d86ed7fbStbbdev         b = c;
739d86ed7fbStbbdev         if (a > b)
740d86ed7fbStbbdev             b = a;
741d86ed7fbStbbdev     }
742d86ed7fbStbbdev     return b;
743d86ed7fbStbbdev }
744d86ed7fbStbbdev 
745d86ed7fbStbbdev #define mmin(a, b) ((a) < (b)) ? (a) : (b)
746d86ed7fbStbbdev 
747d86ed7fbStbbdev #define mpush(lz, hz, dz) \
748d86ed7fbStbbdev     {                     \
749d86ed7fbStbbdev         stackLo[sp] = lz; \
750d86ed7fbStbbdev         stackHi[sp] = hz; \
751d86ed7fbStbbdev         stackD[sp] = dz;  \
752d86ed7fbStbbdev         sp++;             \
753d86ed7fbStbbdev     }
754d86ed7fbStbbdev 
755d86ed7fbStbbdev #define mpop(lz, hz, dz)  \
756d86ed7fbStbbdev     {                     \
757d86ed7fbStbbdev         sp--;             \
758d86ed7fbStbbdev         lz = stackLo[sp]; \
759d86ed7fbStbbdev         hz = stackHi[sp]; \
760d86ed7fbStbbdev         dz = stackD[sp];  \
761d86ed7fbStbbdev     }
762d86ed7fbStbbdev 
763d86ed7fbStbbdev #define mnextsize(az) (nextHi[az] - nextLo[az])
764d86ed7fbStbbdev 
765d86ed7fbStbbdev #define mnextswap(az, bz)        \
766d86ed7fbStbbdev     {                            \
767d86ed7fbStbbdev         Int32 tz;                \
768d86ed7fbStbbdev         tz = nextLo[az];         \
769d86ed7fbStbbdev         nextLo[az] = nextLo[bz]; \
770d86ed7fbStbbdev         nextLo[bz] = tz;         \
771d86ed7fbStbbdev         tz = nextHi[az];         \
772d86ed7fbStbbdev         nextHi[az] = nextHi[bz]; \
773d86ed7fbStbbdev         nextHi[bz] = tz;         \
774d86ed7fbStbbdev         tz = nextD[az];          \
775d86ed7fbStbbdev         nextD[az] = nextD[bz];   \
776d86ed7fbStbbdev         nextD[bz] = tz;          \
777d86ed7fbStbbdev     }
778d86ed7fbStbbdev 
779d86ed7fbStbbdev #define MAIN_QSORT_SMALL_THRESH 20
780d86ed7fbStbbdev #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
781d86ed7fbStbbdev #define MAIN_QSORT_STACK_SIZE   100
782d86ed7fbStbbdev 
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)783d86ed7fbStbbdev static void mainQSort3(UInt32* ptr,
784d86ed7fbStbbdev                        UChar* block,
785d86ed7fbStbbdev                        UInt16* quadrant,
786d86ed7fbStbbdev                        Int32 nblock,
787d86ed7fbStbbdev                        Int32 loSt,
788d86ed7fbStbbdev                        Int32 hiSt,
789d86ed7fbStbbdev                        Int32 dSt,
790d86ed7fbStbbdev                        Int32* budget) {
791d86ed7fbStbbdev     Int32 unLo, unHi, ltLo, gtHi, n, m, med;
792d86ed7fbStbbdev     Int32 sp, lo, hi, d;
793d86ed7fbStbbdev 
794d86ed7fbStbbdev     Int32 stackLo[MAIN_QSORT_STACK_SIZE];
795d86ed7fbStbbdev     Int32 stackHi[MAIN_QSORT_STACK_SIZE];
796d86ed7fbStbbdev     Int32 stackD[MAIN_QSORT_STACK_SIZE];
797d86ed7fbStbbdev 
798d86ed7fbStbbdev     Int32 nextLo[3];
799d86ed7fbStbbdev     Int32 nextHi[3];
800d86ed7fbStbbdev     Int32 nextD[3];
801d86ed7fbStbbdev 
802d86ed7fbStbbdev     sp = 0;
803d86ed7fbStbbdev     mpush(loSt, hiSt, dSt);
804d86ed7fbStbbdev 
805d86ed7fbStbbdev     while (sp > 0) {
806d86ed7fbStbbdev         AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
807d86ed7fbStbbdev 
808d86ed7fbStbbdev         mpop(lo, hi, d);
809d86ed7fbStbbdev         if (hi - lo < MAIN_QSORT_SMALL_THRESH || d > MAIN_QSORT_DEPTH_THRESH) {
810d86ed7fbStbbdev             mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
811d86ed7fbStbbdev             if (*budget < 0)
812d86ed7fbStbbdev                 return;
813d86ed7fbStbbdev             continue;
814d86ed7fbStbbdev         }
815d86ed7fbStbbdev 
816d86ed7fbStbbdev         med = (Int32)mmed3(block[ptr[lo] + d], block[ptr[hi] + d], block[ptr[(lo + hi) >> 1] + d]);
817d86ed7fbStbbdev 
818d86ed7fbStbbdev         unLo = ltLo = lo;
819d86ed7fbStbbdev         unHi = gtHi = hi;
820d86ed7fbStbbdev 
821d86ed7fbStbbdev         while (True) {
822d86ed7fbStbbdev             while (True) {
823d86ed7fbStbbdev                 if (unLo > unHi)
824d86ed7fbStbbdev                     break;
825d86ed7fbStbbdev                 n = ((Int32)block[ptr[unLo] + d]) - med;
826d86ed7fbStbbdev                 if (n == 0) {
827d86ed7fbStbbdev                     mswap(ptr[unLo], ptr[ltLo]);
828d86ed7fbStbbdev                     ltLo++;
829d86ed7fbStbbdev                     unLo++;
830d86ed7fbStbbdev                     continue;
831d86ed7fbStbbdev                 };
832d86ed7fbStbbdev                 if (n > 0)
833d86ed7fbStbbdev                     break;
834d86ed7fbStbbdev                 unLo++;
835d86ed7fbStbbdev             }
836d86ed7fbStbbdev             while (True) {
837d86ed7fbStbbdev                 if (unLo > unHi)
838d86ed7fbStbbdev                     break;
839d86ed7fbStbbdev                 n = ((Int32)block[ptr[unHi] + d]) - med;
840d86ed7fbStbbdev                 if (n == 0) {
841d86ed7fbStbbdev                     mswap(ptr[unHi], ptr[gtHi]);
842d86ed7fbStbbdev                     gtHi--;
843d86ed7fbStbbdev                     unHi--;
844d86ed7fbStbbdev                     continue;
845d86ed7fbStbbdev                 };
846d86ed7fbStbbdev                 if (n < 0)
847d86ed7fbStbbdev                     break;
848d86ed7fbStbbdev                 unHi--;
849d86ed7fbStbbdev             }
850d86ed7fbStbbdev             if (unLo > unHi)
851d86ed7fbStbbdev                 break;
852d86ed7fbStbbdev             mswap(ptr[unLo], ptr[unHi]);
853d86ed7fbStbbdev             unLo++;
854d86ed7fbStbbdev             unHi--;
855d86ed7fbStbbdev         }
856d86ed7fbStbbdev 
857d86ed7fbStbbdev         AssertD(unHi == unLo - 1, "mainQSort3(2)");
858d86ed7fbStbbdev 
859d86ed7fbStbbdev         if (gtHi < ltLo) {
860d86ed7fbStbbdev             mpush(lo, hi, d + 1);
861d86ed7fbStbbdev             continue;
862d86ed7fbStbbdev         }
863d86ed7fbStbbdev 
864d86ed7fbStbbdev         n = mmin(ltLo - lo, unLo - ltLo);
865d86ed7fbStbbdev         mvswap(lo, unLo - n, n);
866d86ed7fbStbbdev         m = mmin(hi - gtHi, gtHi - unHi);
867d86ed7fbStbbdev         mvswap(unLo, hi - m + 1, m);
868d86ed7fbStbbdev 
869d86ed7fbStbbdev         n = lo + unLo - ltLo - 1;
870d86ed7fbStbbdev         m = hi - (gtHi - unHi) + 1;
871d86ed7fbStbbdev 
872d86ed7fbStbbdev         nextLo[0] = lo;
873d86ed7fbStbbdev         nextHi[0] = n;
874d86ed7fbStbbdev         nextD[0] = d;
875d86ed7fbStbbdev         nextLo[1] = m;
876d86ed7fbStbbdev         nextHi[1] = hi;
877d86ed7fbStbbdev         nextD[1] = d;
878d86ed7fbStbbdev         nextLo[2] = n + 1;
879d86ed7fbStbbdev         nextHi[2] = m - 1;
880d86ed7fbStbbdev         nextD[2] = d + 1;
881d86ed7fbStbbdev 
882d86ed7fbStbbdev         if (mnextsize(0) < mnextsize(1))
883d86ed7fbStbbdev             mnextswap(0, 1);
884d86ed7fbStbbdev         if (mnextsize(1) < mnextsize(2))
885d86ed7fbStbbdev             mnextswap(1, 2);
886d86ed7fbStbbdev         if (mnextsize(0) < mnextsize(1))
887d86ed7fbStbbdev             mnextswap(0, 1);
888d86ed7fbStbbdev 
889d86ed7fbStbbdev         AssertD(mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
890d86ed7fbStbbdev         AssertD(mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
891d86ed7fbStbbdev 
892d86ed7fbStbbdev         mpush(nextLo[0], nextHi[0], nextD[0]);
893d86ed7fbStbbdev         mpush(nextLo[1], nextHi[1], nextD[1]);
894d86ed7fbStbbdev         mpush(nextLo[2], nextHi[2], nextD[2]);
895d86ed7fbStbbdev     }
896d86ed7fbStbbdev }
897d86ed7fbStbbdev 
898d86ed7fbStbbdev #undef mswap
899d86ed7fbStbbdev #undef mvswap
900d86ed7fbStbbdev #undef mpush
901d86ed7fbStbbdev #undef mpop
902d86ed7fbStbbdev #undef mmin
903d86ed7fbStbbdev #undef mnextsize
904d86ed7fbStbbdev #undef mnextswap
905d86ed7fbStbbdev #undef MAIN_QSORT_SMALL_THRESH
906d86ed7fbStbbdev #undef MAIN_QSORT_DEPTH_THRESH
907d86ed7fbStbbdev #undef MAIN_QSORT_STACK_SIZE
908d86ed7fbStbbdev 
909d86ed7fbStbbdev /*---------------------------------------------*/
910d86ed7fbStbbdev /* Pre:
911d86ed7fbStbbdev       nblock > N_OVERSHOOT
912d86ed7fbStbbdev       block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
913d86ed7fbStbbdev       ((UChar*)block32) [0 .. nblock-1] holds block
914d86ed7fbStbbdev       ptr exists for [0 .. nblock-1]
915d86ed7fbStbbdev 
916d86ed7fbStbbdev    Post:
917d86ed7fbStbbdev       ((UChar*)block32) [0 .. nblock-1] holds block
918d86ed7fbStbbdev       All other areas of block32 destroyed
919d86ed7fbStbbdev       ftab [0 .. 65536 ] destroyed
920d86ed7fbStbbdev       ptr [0 .. nblock-1] holds sorted order
921d86ed7fbStbbdev       if (*budget < 0), sorting was abandoned
922d86ed7fbStbbdev */
923d86ed7fbStbbdev 
924d86ed7fbStbbdev #define BIGFREQ(b) (ftab[((b) + 1) << 8] - ftab[(b) << 8])
925d86ed7fbStbbdev #define SETMASK    (1 << 21)
926d86ed7fbStbbdev #define CLEARMASK  (~(SETMASK))
927d86ed7fbStbbdev 
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)928d86ed7fbStbbdev static void mainSort(UInt32* ptr,
929d86ed7fbStbbdev                      UChar* block,
930d86ed7fbStbbdev                      UInt16* quadrant,
931d86ed7fbStbbdev                      UInt32* ftab,
932d86ed7fbStbbdev                      Int32 nblock,
933d86ed7fbStbbdev                      Int32 verb,
934d86ed7fbStbbdev                      Int32* budget) {
935d86ed7fbStbbdev     Int32 i, j, k, ss, sb;
936d86ed7fbStbbdev     Int32 runningOrder[256];
937d86ed7fbStbbdev     Bool bigDone[256];
938d86ed7fbStbbdev     Int32 copyStart[256];
939d86ed7fbStbbdev     Int32 copyEnd[256];
940d86ed7fbStbbdev     UChar c1;
941d86ed7fbStbbdev     Int32 numQSorted;
942d86ed7fbStbbdev     UInt16 s;
943d86ed7fbStbbdev     if (verb >= 4)
944d86ed7fbStbbdev         VPrintf0("        main sort initialise ...\n");
945d86ed7fbStbbdev 
946d86ed7fbStbbdev     /*-- set up the 2-byte frequency table --*/
947d86ed7fbStbbdev     for (i = 65536; i >= 0; i--)
948d86ed7fbStbbdev         ftab[i] = 0;
949d86ed7fbStbbdev 
950d86ed7fbStbbdev     j = block[0] << 8;
951d86ed7fbStbbdev     i = nblock - 1;
952d86ed7fbStbbdev     for (; i >= 3; i -= 4) {
953d86ed7fbStbbdev         quadrant[i] = 0;
954d86ed7fbStbbdev         j = (j >> 8) | (((UInt16)block[i]) << 8);
955d86ed7fbStbbdev         ftab[j]++;
956d86ed7fbStbbdev         quadrant[i - 1] = 0;
957d86ed7fbStbbdev         j = (j >> 8) | (((UInt16)block[i - 1]) << 8);
958d86ed7fbStbbdev         ftab[j]++;
959d86ed7fbStbbdev         quadrant[i - 2] = 0;
960d86ed7fbStbbdev         j = (j >> 8) | (((UInt16)block[i - 2]) << 8);
961d86ed7fbStbbdev         ftab[j]++;
962d86ed7fbStbbdev         quadrant[i - 3] = 0;
963d86ed7fbStbbdev         j = (j >> 8) | (((UInt16)block[i - 3]) << 8);
964d86ed7fbStbbdev         ftab[j]++;
965d86ed7fbStbbdev     }
966d86ed7fbStbbdev     for (; i >= 0; i--) {
967d86ed7fbStbbdev         quadrant[i] = 0;
968d86ed7fbStbbdev         j = (j >> 8) | (((UInt16)block[i]) << 8);
969d86ed7fbStbbdev         ftab[j]++;
970d86ed7fbStbbdev     }
971d86ed7fbStbbdev 
972d86ed7fbStbbdev     /*-- (emphasises close relationship of block & quadrant) --*/
973d86ed7fbStbbdev     for (i = 0; i < BZ_N_OVERSHOOT; i++) {
974d86ed7fbStbbdev         block[nblock + i] = block[i];
975d86ed7fbStbbdev         quadrant[nblock + i] = 0;
976d86ed7fbStbbdev     }
977d86ed7fbStbbdev 
978d86ed7fbStbbdev     if (verb >= 4)
979d86ed7fbStbbdev         VPrintf0("        bucket sorting ...\n");
980d86ed7fbStbbdev 
981d86ed7fbStbbdev     /*-- Complete the initial radix sort --*/
982d86ed7fbStbbdev     for (i = 1; i <= 65536; i++)
983d86ed7fbStbbdev         ftab[i] += ftab[i - 1];
984d86ed7fbStbbdev 
985d86ed7fbStbbdev     s = block[0] << 8;
986d86ed7fbStbbdev     i = nblock - 1;
987d86ed7fbStbbdev     for (; i >= 3; i -= 4) {
988d86ed7fbStbbdev         s = (s >> 8) | (block[i] << 8);
989d86ed7fbStbbdev         j = ftab[s] - 1;
990d86ed7fbStbbdev         ftab[s] = j;
991d86ed7fbStbbdev         ptr[j] = i;
992d86ed7fbStbbdev         s = (s >> 8) | (block[i - 1] << 8);
993d86ed7fbStbbdev         j = ftab[s] - 1;
994d86ed7fbStbbdev         ftab[s] = j;
995d86ed7fbStbbdev         ptr[j] = i - 1;
996d86ed7fbStbbdev         s = (s >> 8) | (block[i - 2] << 8);
997d86ed7fbStbbdev         j = ftab[s] - 1;
998d86ed7fbStbbdev         ftab[s] = j;
999d86ed7fbStbbdev         ptr[j] = i - 2;
1000d86ed7fbStbbdev         s = (s >> 8) | (block[i - 3] << 8);
1001d86ed7fbStbbdev         j = ftab[s] - 1;
1002d86ed7fbStbbdev         ftab[s] = j;
1003d86ed7fbStbbdev         ptr[j] = i - 3;
1004d86ed7fbStbbdev     }
1005d86ed7fbStbbdev     for (; i >= 0; i--) {
1006d86ed7fbStbbdev         s = (s >> 8) | (block[i] << 8);
1007d86ed7fbStbbdev         j = ftab[s] - 1;
1008d86ed7fbStbbdev         ftab[s] = j;
1009d86ed7fbStbbdev         ptr[j] = i;
1010d86ed7fbStbbdev     }
1011d86ed7fbStbbdev 
1012d86ed7fbStbbdev     /*--
1013d86ed7fbStbbdev       Now ftab contains the first loc of every small bucket.
1014d86ed7fbStbbdev       Calculate the running order, from smallest to largest
1015d86ed7fbStbbdev       big bucket.
1016d86ed7fbStbbdev    --*/
1017d86ed7fbStbbdev     for (i = 0; i <= 255; i++) {
1018d86ed7fbStbbdev         bigDone[i] = False;
1019d86ed7fbStbbdev         runningOrder[i] = i;
1020d86ed7fbStbbdev     }
1021d86ed7fbStbbdev 
1022d86ed7fbStbbdev     {
1023d86ed7fbStbbdev         Int32 vv;
1024d86ed7fbStbbdev         Int32 h = 1;
1025d86ed7fbStbbdev         do
1026d86ed7fbStbbdev             h = 3 * h + 1;
1027d86ed7fbStbbdev         while (h <= 256);
1028d86ed7fbStbbdev         do {
1029d86ed7fbStbbdev             h = h / 3;
1030d86ed7fbStbbdev             for (i = h; i <= 255; i++) {
1031d86ed7fbStbbdev                 vv = runningOrder[i];
1032d86ed7fbStbbdev                 j = i;
1033d86ed7fbStbbdev                 while (BIGFREQ(runningOrder[j - h]) > BIGFREQ(vv)) {
1034d86ed7fbStbbdev                     runningOrder[j] = runningOrder[j - h];
1035d86ed7fbStbbdev                     j = j - h;
1036d86ed7fbStbbdev                     if (j <= (h - 1))
1037d86ed7fbStbbdev                         goto zero;
1038d86ed7fbStbbdev                 }
1039d86ed7fbStbbdev             zero:
1040d86ed7fbStbbdev                 runningOrder[j] = vv;
1041d86ed7fbStbbdev             }
1042d86ed7fbStbbdev         } while (h != 1);
1043d86ed7fbStbbdev     }
1044d86ed7fbStbbdev 
1045d86ed7fbStbbdev     /*--
1046d86ed7fbStbbdev       The main sorting loop.
1047d86ed7fbStbbdev    --*/
1048d86ed7fbStbbdev 
1049d86ed7fbStbbdev     numQSorted = 0;
1050d86ed7fbStbbdev 
1051d86ed7fbStbbdev     for (i = 0; i <= 255; i++) {
1052d86ed7fbStbbdev         /*--
1053d86ed7fbStbbdev          Process big buckets, starting with the least full.
1054d86ed7fbStbbdev          Basically this is a 3-step process in which we call
1055d86ed7fbStbbdev          mainQSort3 to sort the small buckets [ss, j], but
1056d86ed7fbStbbdev          also make a big effort to avoid the calls if we can.
1057d86ed7fbStbbdev       --*/
1058d86ed7fbStbbdev         ss = runningOrder[i];
1059d86ed7fbStbbdev 
1060d86ed7fbStbbdev         /*--
1061d86ed7fbStbbdev          Step 1:
1062d86ed7fbStbbdev          Complete the big bucket [ss] by quicksorting
1063d86ed7fbStbbdev          any unsorted small buckets [ss, j], for j != ss.
1064d86ed7fbStbbdev          Hopefully previous pointer-scanning phases have already
1065d86ed7fbStbbdev          completed many of the small buckets [ss, j], so
1066d86ed7fbStbbdev          we don't have to sort them at all.
1067d86ed7fbStbbdev       --*/
1068d86ed7fbStbbdev         for (j = 0; j <= 255; j++) {
1069d86ed7fbStbbdev             if (j != ss) {
1070d86ed7fbStbbdev                 sb = (ss << 8) + j;
1071d86ed7fbStbbdev                 if (!(ftab[sb] & SETMASK)) {
1072d86ed7fbStbbdev                     Int32 lo = ftab[sb] & CLEARMASK;
1073d86ed7fbStbbdev                     Int32 hi = (ftab[sb + 1] & CLEARMASK) - 1;
1074d86ed7fbStbbdev                     if (hi > lo) {
1075d86ed7fbStbbdev                         if (verb >= 4)
1076d86ed7fbStbbdev                             VPrintf4("        qsort [0x%x, 0x%x]   "
1077d86ed7fbStbbdev                                      "done %d   this %d\n",
1078d86ed7fbStbbdev                                      ss,
1079d86ed7fbStbbdev                                      j,
1080d86ed7fbStbbdev                                      numQSorted,
1081d86ed7fbStbbdev                                      hi - lo + 1);
1082d86ed7fbStbbdev                         mainQSort3(ptr, block, quadrant, nblock, lo, hi, BZ_N_RADIX, budget);
1083d86ed7fbStbbdev                         numQSorted += (hi - lo + 1);
1084d86ed7fbStbbdev                         if (*budget < 0)
1085d86ed7fbStbbdev                             return;
1086d86ed7fbStbbdev                     }
1087d86ed7fbStbbdev                 }
1088d86ed7fbStbbdev                 ftab[sb] |= SETMASK;
1089d86ed7fbStbbdev             }
1090d86ed7fbStbbdev         }
1091d86ed7fbStbbdev 
1092d86ed7fbStbbdev         AssertH(!bigDone[ss], 1006);
1093d86ed7fbStbbdev 
1094d86ed7fbStbbdev         /*--
1095d86ed7fbStbbdev          Step 2:
1096d86ed7fbStbbdev          Now scan this big bucket [ss] so as to synthesise the
1097d86ed7fbStbbdev          sorted order for small buckets [t, ss] for all t,
1098d86ed7fbStbbdev          including, magically, the bucket [ss,ss] too.
1099d86ed7fbStbbdev          This will avoid doing Real Work in subsequent Step 1's.
1100d86ed7fbStbbdev       --*/
1101d86ed7fbStbbdev         {
1102d86ed7fbStbbdev             for (j = 0; j <= 255; j++) {
1103d86ed7fbStbbdev                 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
1104d86ed7fbStbbdev                 copyEnd[j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
1105d86ed7fbStbbdev             }
1106d86ed7fbStbbdev             for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
1107d86ed7fbStbbdev                 k = ptr[j] - 1;
1108d86ed7fbStbbdev                 if (k < 0)
1109d86ed7fbStbbdev                     k += nblock;
1110d86ed7fbStbbdev                 c1 = block[k];
1111d86ed7fbStbbdev                 if (!bigDone[c1])
1112d86ed7fbStbbdev                     ptr[copyStart[c1]++] = k;
1113d86ed7fbStbbdev             }
1114d86ed7fbStbbdev             for (j = (ftab[(ss + 1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
1115d86ed7fbStbbdev                 k = ptr[j] - 1;
1116d86ed7fbStbbdev                 if (k < 0)
1117d86ed7fbStbbdev                     k += nblock;
1118d86ed7fbStbbdev                 c1 = block[k];
1119d86ed7fbStbbdev                 if (!bigDone[c1])
1120d86ed7fbStbbdev                     ptr[copyEnd[c1]--] = k;
1121d86ed7fbStbbdev             }
1122d86ed7fbStbbdev         }
1123d86ed7fbStbbdev 
1124d86ed7fbStbbdev         AssertH((copyStart[ss] - 1 == copyEnd[ss]) ||
1125d86ed7fbStbbdev                     /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
1126d86ed7fbStbbdev                    Necessity for this case is demonstrated by compressing
1127d86ed7fbStbbdev                    a sequence of approximately 48.5 million of character
1128d86ed7fbStbbdev                    251; 1.0.0/1.0.1 will then die here. */
1129d86ed7fbStbbdev                     (copyStart[ss] == 0 && copyEnd[ss] == nblock - 1),
1130d86ed7fbStbbdev                 1007)
1131d86ed7fbStbbdev 
1132d86ed7fbStbbdev             for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
1133d86ed7fbStbbdev 
1134d86ed7fbStbbdev         /*--
1135d86ed7fbStbbdev          Step 3:
1136d86ed7fbStbbdev          The [ss] big bucket is now done.  Record this fact,
1137d86ed7fbStbbdev          and update the quadrant descriptors.  Remember to
1138d86ed7fbStbbdev          update quadrants in the overshoot area too, if
1139d86ed7fbStbbdev          necessary.  The "if (i < 255)" test merely skips
1140d86ed7fbStbbdev          this updating for the last bucket processed, since
1141d86ed7fbStbbdev          updating for the last bucket is pointless.
1142d86ed7fbStbbdev 
1143d86ed7fbStbbdev          The quadrant array provides a way to incrementally
1144d86ed7fbStbbdev          cache sort orderings, as they appear, so as to
1145d86ed7fbStbbdev          make subsequent comparisons in fullGtU() complete
1146d86ed7fbStbbdev          faster.  For repetitive blocks this makes a big
1147d86ed7fbStbbdev          difference (but not big enough to be able to avoid
1148d86ed7fbStbbdev          the fallback sorting mechanism, exponential radix sort).
1149d86ed7fbStbbdev 
1150d86ed7fbStbbdev          The precise meaning is: at all times:
1151d86ed7fbStbbdev 
1152d86ed7fbStbbdev             for 0 <= i < nblock and 0 <= j <= nblock
1153d86ed7fbStbbdev 
1154d86ed7fbStbbdev             if block[i] != block[j],
1155d86ed7fbStbbdev 
1156d86ed7fbStbbdev                then the relative values of quadrant[i] and
1157d86ed7fbStbbdev                     quadrant[j] are meaningless.
1158d86ed7fbStbbdev 
1159d86ed7fbStbbdev                else {
1160d86ed7fbStbbdev                   if quadrant[i] < quadrant[j]
1161d86ed7fbStbbdev                      then the string starting at i lexicographically
1162d86ed7fbStbbdev                      precedes the string starting at j
1163d86ed7fbStbbdev 
1164d86ed7fbStbbdev                   else if quadrant[i] > quadrant[j]
1165d86ed7fbStbbdev                      then the string starting at j lexicographically
1166d86ed7fbStbbdev                      precedes the string starting at i
1167d86ed7fbStbbdev 
1168d86ed7fbStbbdev                   else
1169d86ed7fbStbbdev                      the relative ordering of the strings starting
1170d86ed7fbStbbdev                      at i and j has not yet been determined.
1171d86ed7fbStbbdev                }
1172d86ed7fbStbbdev       --*/
1173d86ed7fbStbbdev         bigDone[ss] = True;
1174d86ed7fbStbbdev 
1175d86ed7fbStbbdev         if (i < 255) {
1176d86ed7fbStbbdev             Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1177d86ed7fbStbbdev             Int32 bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1178d86ed7fbStbbdev             Int32 shifts = 0;
1179d86ed7fbStbbdev 
1180d86ed7fbStbbdev             while ((bbSize >> shifts) > 65534)
1181d86ed7fbStbbdev                 shifts++;
1182d86ed7fbStbbdev 
1183d86ed7fbStbbdev             for (j = bbSize - 1; j >= 0; j--) {
1184d86ed7fbStbbdev                 Int32 a2update = ptr[bbStart + j];
1185d86ed7fbStbbdev                 UInt16 qVal = (UInt16)(j >> shifts);
1186d86ed7fbStbbdev                 quadrant[a2update] = qVal;
1187d86ed7fbStbbdev                 if (a2update < BZ_N_OVERSHOOT)
1188d86ed7fbStbbdev                     quadrant[a2update + nblock] = qVal;
1189d86ed7fbStbbdev             }
1190d86ed7fbStbbdev             AssertH(((bbSize - 1) >> shifts) <= 65535, 1002);
1191d86ed7fbStbbdev         }
1192d86ed7fbStbbdev     }
1193d86ed7fbStbbdev 
1194d86ed7fbStbbdev     if (verb >= 4)
1195d86ed7fbStbbdev         VPrintf3("        %d pointers, %d sorted, %d scanned\n",
1196d86ed7fbStbbdev                  nblock,
1197d86ed7fbStbbdev                  numQSorted,
1198d86ed7fbStbbdev                  nblock - numQSorted);
1199d86ed7fbStbbdev }
1200d86ed7fbStbbdev 
1201d86ed7fbStbbdev #undef BIGFREQ
1202d86ed7fbStbbdev #undef SETMASK
1203d86ed7fbStbbdev #undef CLEARMASK
1204d86ed7fbStbbdev 
1205d86ed7fbStbbdev /*---------------------------------------------*/
1206d86ed7fbStbbdev /* Pre:
1207d86ed7fbStbbdev       nblock > 0
1208d86ed7fbStbbdev       arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1209d86ed7fbStbbdev       ((UChar*)arr2)  [0 .. nblock-1] holds block
1210d86ed7fbStbbdev       arr1 exists for [0 .. nblock-1]
1211d86ed7fbStbbdev 
1212d86ed7fbStbbdev    Post:
1213d86ed7fbStbbdev       ((UChar*)arr2) [0 .. nblock-1] holds block
1214d86ed7fbStbbdev       All other areas of block destroyed
1215d86ed7fbStbbdev       ftab [ 0 .. 65536 ] destroyed
1216d86ed7fbStbbdev       arr1 [0 .. nblock-1] holds sorted order
1217d86ed7fbStbbdev */
BZ2_blockSort(EState * s)1218d86ed7fbStbbdev void BZ2_blockSort(EState* s) {
1219d86ed7fbStbbdev     UInt32* ptr = s->ptr;
1220d86ed7fbStbbdev     UChar* block = s->block;
1221d86ed7fbStbbdev     UInt32* ftab = s->ftab;
1222d86ed7fbStbbdev     Int32 nblock = s->nblock;
1223d86ed7fbStbbdev     Int32 verb = s->verbosity;
1224d86ed7fbStbbdev     Int32 wfact = s->workFactor;
1225d86ed7fbStbbdev     UInt16* quadrant;
1226d86ed7fbStbbdev     Int32 budget;
1227d86ed7fbStbbdev     Int32 budgetInit;
1228d86ed7fbStbbdev     Int32 i;
1229d86ed7fbStbbdev 
1230d86ed7fbStbbdev     if (nblock < 10000) {
1231d86ed7fbStbbdev         fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1232d86ed7fbStbbdev     }
1233d86ed7fbStbbdev     else {
1234d86ed7fbStbbdev         /* Calculate the location for quadrant, remembering to get
1235d86ed7fbStbbdev          the alignment right.  Assumes that &(block[0]) is at least
1236d86ed7fbStbbdev          2-byte aligned -- this should be ok since block is really
1237d86ed7fbStbbdev          the first section of arr2.
1238d86ed7fbStbbdev       */
1239d86ed7fbStbbdev         i = nblock + BZ_N_OVERSHOOT;
1240d86ed7fbStbbdev         if (i & 1)
1241d86ed7fbStbbdev             i++;
1242d86ed7fbStbbdev         quadrant = (UInt16*)(&(block[i]));
1243d86ed7fbStbbdev 
1244d86ed7fbStbbdev         /* (wfact-1) / 3 puts the default-factor-30
1245d86ed7fbStbbdev          transition point at very roughly the same place as
1246d86ed7fbStbbdev          with v0.1 and v0.9.0.
1247d86ed7fbStbbdev          Not that it particularly matters any more, since the
1248d86ed7fbStbbdev          resulting compressed stream is now the same regardless
1249d86ed7fbStbbdev          of whether or not we use the main sort or fallback sort.
1250d86ed7fbStbbdev       */
1251d86ed7fbStbbdev         if (wfact < 1)
1252d86ed7fbStbbdev             wfact = 1;
1253d86ed7fbStbbdev         if (wfact > 100)
1254d86ed7fbStbbdev             wfact = 100;
1255d86ed7fbStbbdev         budgetInit = nblock * ((wfact - 1) / 3);
1256d86ed7fbStbbdev         budget = budgetInit;
1257d86ed7fbStbbdev 
1258d86ed7fbStbbdev         mainSort(ptr, block, quadrant, ftab, nblock, verb, &budget);
1259d86ed7fbStbbdev         if (verb >= 3)
1260d86ed7fbStbbdev             VPrintf3("      %d work, %d block, ratio %5.2f\n",
1261d86ed7fbStbbdev                      budgetInit - budget,
1262d86ed7fbStbbdev                      nblock,
1263d86ed7fbStbbdev                      (float)(budgetInit - budget) / (float)(nblock == 0 ? 1 : nblock));
1264d86ed7fbStbbdev         if (budget < 0) {
1265d86ed7fbStbbdev             if (verb >= 2)
1266d86ed7fbStbbdev                 VPrintf0("    too repetitive; using fallback"
1267d86ed7fbStbbdev                          " sorting algorithm\n");
1268d86ed7fbStbbdev             fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1269d86ed7fbStbbdev         }
1270d86ed7fbStbbdev     }
1271d86ed7fbStbbdev 
1272d86ed7fbStbbdev     s->origPtr = -1;
1273d86ed7fbStbbdev     for (i = 0; i < s->nblock; i++)
1274d86ed7fbStbbdev         if (ptr[i] == 0) {
1275d86ed7fbStbbdev             s->origPtr = i;
1276d86ed7fbStbbdev             break;
1277d86ed7fbStbbdev         };
1278d86ed7fbStbbdev 
1279d86ed7fbStbbdev     AssertH(s->origPtr != -1, 1003);
1280d86ed7fbStbbdev }
1281d86ed7fbStbbdev 
1282d86ed7fbStbbdev /*-------------------------------------------------------------*/
1283d86ed7fbStbbdev /*--- end                                       blocksort.c ---*/
1284d86ed7fbStbbdev /*-------------------------------------------------------------*/
1285