1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*-------------------------------------------------------------*/
18d86ed7fbStbbdev /*--- Block sorting machinery ---*/
19d86ed7fbStbbdev /*--- blocksort.cpp ---*/
20d86ed7fbStbbdev /*-------------------------------------------------------------*/
21d86ed7fbStbbdev
22d86ed7fbStbbdev /* ------------------------------------------------------------------
23d86ed7fbStbbdev The original source for this example:
24d86ed7fbStbbdev This file is part of bzip2/libbzip2, a program and library for
25d86ed7fbStbbdev lossless, block-sorting data compression.
26d86ed7fbStbbdev
27d86ed7fbStbbdev bzip2/libbzip2 version 1.0.6 of 6 September 2010
28d86ed7fbStbbdev Copyright (C) 1996-2010 Julian Seward <[email protected]>
29d86ed7fbStbbdev
30d86ed7fbStbbdev This program, "bzip2", the associated library "libbzip2", and all
31d86ed7fbStbbdev documentation, are copyright (C) 1996-2010 Julian R Seward. All
32d86ed7fbStbbdev rights reserved.
33d86ed7fbStbbdev
34d86ed7fbStbbdev Redistribution and use in source and binary forms, with or without
35d86ed7fbStbbdev modification, are permitted provided that the following conditions
36d86ed7fbStbbdev are met:
37d86ed7fbStbbdev
38d86ed7fbStbbdev 1. Redistributions of source code must retain the above copyright
39d86ed7fbStbbdev notice, this list of conditions and the following disclaimer.
40d86ed7fbStbbdev
41d86ed7fbStbbdev 2. The origin of this software must not be misrepresented; you must
42d86ed7fbStbbdev not claim that you wrote the original software. If you use this
43d86ed7fbStbbdev software in a product, an acknowledgment in the product
44d86ed7fbStbbdev documentation would be appreciated but is not required.
45d86ed7fbStbbdev
46d86ed7fbStbbdev 3. Altered source versions must be plainly marked as such, and must
47d86ed7fbStbbdev not be misrepresented as being the original software.
48d86ed7fbStbbdev
49d86ed7fbStbbdev 4. The name of the author may not be used to endorse or promote
50d86ed7fbStbbdev products derived from this software without specific prior written
51d86ed7fbStbbdev permission.
52d86ed7fbStbbdev
53d86ed7fbStbbdev THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54d86ed7fbStbbdev OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55d86ed7fbStbbdev WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56d86ed7fbStbbdev ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57d86ed7fbStbbdev DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58d86ed7fbStbbdev DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59d86ed7fbStbbdev GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60d86ed7fbStbbdev INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61d86ed7fbStbbdev WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62d86ed7fbStbbdev NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63d86ed7fbStbbdev SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64d86ed7fbStbbdev
65d86ed7fbStbbdev Julian Seward, [email protected]
66d86ed7fbStbbdev bzip2/libbzip2 version 1.0.6 of 6 September 2010
67d86ed7fbStbbdev ------------------------------------------------------------------ */
68d86ed7fbStbbdev
69d86ed7fbStbbdev #include "bzlib_private.hpp"
70d86ed7fbStbbdev
71d86ed7fbStbbdev /*---------------------------------------------*/
72d86ed7fbStbbdev /*--- Fallback O(N log(N)^2) sorting ---*/
73d86ed7fbStbbdev /*--- algorithm, for repetitive blocks ---*/
74d86ed7fbStbbdev /*---------------------------------------------*/
75d86ed7fbStbbdev
76d86ed7fbStbbdev /*---------------------------------------------*/
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)77d86ed7fbStbbdev static __inline__ void fallbackSimpleSort(UInt32* fmap, UInt32* eclass, Int32 lo, Int32 hi) {
78d86ed7fbStbbdev Int32 i, j, tmp;
79d86ed7fbStbbdev UInt32 ec_tmp;
80d86ed7fbStbbdev
81d86ed7fbStbbdev if (lo == hi)
82d86ed7fbStbbdev return;
83d86ed7fbStbbdev
84d86ed7fbStbbdev if (hi - lo > 3) {
85d86ed7fbStbbdev for (i = hi - 4; i >= lo; i--) {
86d86ed7fbStbbdev tmp = fmap[i];
87d86ed7fbStbbdev ec_tmp = eclass[tmp];
88d86ed7fbStbbdev for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
89d86ed7fbStbbdev fmap[j - 4] = fmap[j];
90d86ed7fbStbbdev fmap[j - 4] = tmp;
91d86ed7fbStbbdev }
92d86ed7fbStbbdev }
93d86ed7fbStbbdev
94d86ed7fbStbbdev for (i = hi - 1; i >= lo; i--) {
95d86ed7fbStbbdev tmp = fmap[i];
96d86ed7fbStbbdev ec_tmp = eclass[tmp];
97d86ed7fbStbbdev for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
98d86ed7fbStbbdev fmap[j - 1] = fmap[j];
99d86ed7fbStbbdev fmap[j - 1] = tmp;
100d86ed7fbStbbdev }
101d86ed7fbStbbdev }
102d86ed7fbStbbdev
103d86ed7fbStbbdev /*---------------------------------------------*/
104d86ed7fbStbbdev #define fswap(zz1, zz2) \
105d86ed7fbStbbdev { \
106d86ed7fbStbbdev Int32 zztmp = zz1; \
107d86ed7fbStbbdev zz1 = zz2; \
108d86ed7fbStbbdev zz2 = zztmp; \
109d86ed7fbStbbdev }
110d86ed7fbStbbdev
111d86ed7fbStbbdev #define fvswap(zzp1, zzp2, zzn) \
112d86ed7fbStbbdev { \
113d86ed7fbStbbdev Int32 yyp1 = (zzp1); \
114d86ed7fbStbbdev Int32 yyp2 = (zzp2); \
115d86ed7fbStbbdev Int32 yyn = (zzn); \
116d86ed7fbStbbdev while (yyn > 0) { \
117d86ed7fbStbbdev fswap(fmap[yyp1], fmap[yyp2]); \
118d86ed7fbStbbdev yyp1++; \
119d86ed7fbStbbdev yyp2++; \
120d86ed7fbStbbdev yyn--; \
121d86ed7fbStbbdev } \
122d86ed7fbStbbdev }
123d86ed7fbStbbdev
124d86ed7fbStbbdev #define fmin(a, b) ((a) < (b)) ? (a) : (b)
125d86ed7fbStbbdev
126d86ed7fbStbbdev #define fpush(lz, hz) \
127d86ed7fbStbbdev { \
128d86ed7fbStbbdev stackLo[sp] = lz; \
129d86ed7fbStbbdev stackHi[sp] = hz; \
130d86ed7fbStbbdev sp++; \
131d86ed7fbStbbdev }
132d86ed7fbStbbdev
133d86ed7fbStbbdev #define fpop(lz, hz) \
134d86ed7fbStbbdev { \
135d86ed7fbStbbdev sp--; \
136d86ed7fbStbbdev lz = stackLo[sp]; \
137d86ed7fbStbbdev hz = stackHi[sp]; \
138d86ed7fbStbbdev }
139d86ed7fbStbbdev
140d86ed7fbStbbdev #define FALLBACK_QSORT_SMALL_THRESH 10
141d86ed7fbStbbdev #define FALLBACK_QSORT_STACK_SIZE 100
142d86ed7fbStbbdev
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)143d86ed7fbStbbdev static void fallbackQSort3(UInt32* fmap, UInt32* eclass, Int32 loSt, Int32 hiSt) {
144d86ed7fbStbbdev Int32 unLo, unHi, ltLo, gtHi, n, m;
145d86ed7fbStbbdev Int32 sp, lo, hi;
146d86ed7fbStbbdev UInt32 med, r, r3;
147d86ed7fbStbbdev Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
148d86ed7fbStbbdev Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
149d86ed7fbStbbdev
150d86ed7fbStbbdev r = 0;
151d86ed7fbStbbdev
152d86ed7fbStbbdev sp = 0;
153d86ed7fbStbbdev fpush(loSt, hiSt);
154d86ed7fbStbbdev
155d86ed7fbStbbdev while (sp > 0) {
156d86ed7fbStbbdev AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
157d86ed7fbStbbdev
158d86ed7fbStbbdev fpop(lo, hi);
159d86ed7fbStbbdev if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
160d86ed7fbStbbdev fallbackSimpleSort(fmap, eclass, lo, hi);
161d86ed7fbStbbdev continue;
162d86ed7fbStbbdev }
163d86ed7fbStbbdev
164d86ed7fbStbbdev /* Random partitioning. Median of 3 sometimes fails to
165d86ed7fbStbbdev avoid bad cases. Median of 9 seems to help but
166d86ed7fbStbbdev looks rather expensive. This too seems to work but
167d86ed7fbStbbdev is cheaper. Guidance for the magic constants
168d86ed7fbStbbdev 7621 and 32768 is taken from Sedgewick's algorithms
169d86ed7fbStbbdev book, chapter 35.
170d86ed7fbStbbdev */
171d86ed7fbStbbdev r = ((r * 7621) + 1) % 32768;
172d86ed7fbStbbdev r3 = r % 3;
173d86ed7fbStbbdev if (r3 == 0)
174d86ed7fbStbbdev med = eclass[fmap[lo]];
175d86ed7fbStbbdev else if (r3 == 1)
176d86ed7fbStbbdev med = eclass[fmap[(lo + hi) >> 1]];
177d86ed7fbStbbdev else
178d86ed7fbStbbdev med = eclass[fmap[hi]];
179d86ed7fbStbbdev
180d86ed7fbStbbdev unLo = ltLo = lo;
181d86ed7fbStbbdev unHi = gtHi = hi;
182d86ed7fbStbbdev
183d86ed7fbStbbdev while (1) {
184d86ed7fbStbbdev while (1) {
185d86ed7fbStbbdev if (unLo > unHi)
186d86ed7fbStbbdev break;
187d86ed7fbStbbdev n = (Int32)eclass[fmap[unLo]] - (Int32)med;
188d86ed7fbStbbdev if (n == 0) {
189d86ed7fbStbbdev fswap(fmap[unLo], fmap[ltLo]);
190d86ed7fbStbbdev ltLo++;
191d86ed7fbStbbdev unLo++;
192d86ed7fbStbbdev continue;
193d86ed7fbStbbdev };
194d86ed7fbStbbdev if (n > 0)
195d86ed7fbStbbdev break;
196d86ed7fbStbbdev unLo++;
197d86ed7fbStbbdev }
198d86ed7fbStbbdev while (1) {
199d86ed7fbStbbdev if (unLo > unHi)
200d86ed7fbStbbdev break;
201d86ed7fbStbbdev n = (Int32)eclass[fmap[unHi]] - (Int32)med;
202d86ed7fbStbbdev if (n == 0) {
203d86ed7fbStbbdev fswap(fmap[unHi], fmap[gtHi]);
204d86ed7fbStbbdev gtHi--;
205d86ed7fbStbbdev unHi--;
206d86ed7fbStbbdev continue;
207d86ed7fbStbbdev };
208d86ed7fbStbbdev if (n < 0)
209d86ed7fbStbbdev break;
210d86ed7fbStbbdev unHi--;
211d86ed7fbStbbdev }
212d86ed7fbStbbdev if (unLo > unHi)
213d86ed7fbStbbdev break;
214d86ed7fbStbbdev fswap(fmap[unLo], fmap[unHi]);
215d86ed7fbStbbdev unLo++;
216d86ed7fbStbbdev unHi--;
217d86ed7fbStbbdev }
218d86ed7fbStbbdev
219d86ed7fbStbbdev AssertD(unHi == unLo - 1, "fallbackQSort3(2)");
220d86ed7fbStbbdev
221d86ed7fbStbbdev if (gtHi < ltLo)
222d86ed7fbStbbdev continue;
223d86ed7fbStbbdev
224d86ed7fbStbbdev n = fmin(ltLo - lo, unLo - ltLo);
225d86ed7fbStbbdev fvswap(lo, unLo - n, n);
226d86ed7fbStbbdev m = fmin(hi - gtHi, gtHi - unHi);
227d86ed7fbStbbdev fvswap(unLo, hi - m + 1, m);
228d86ed7fbStbbdev
229d86ed7fbStbbdev n = lo + unLo - ltLo - 1;
230d86ed7fbStbbdev m = hi - (gtHi - unHi) + 1;
231d86ed7fbStbbdev
232d86ed7fbStbbdev if (n - lo > hi - m) {
233d86ed7fbStbbdev fpush(lo, n);
234d86ed7fbStbbdev fpush(m, hi);
235d86ed7fbStbbdev }
236d86ed7fbStbbdev else {
237d86ed7fbStbbdev fpush(m, hi);
238d86ed7fbStbbdev fpush(lo, n);
239d86ed7fbStbbdev }
240d86ed7fbStbbdev }
241d86ed7fbStbbdev }
242d86ed7fbStbbdev
243d86ed7fbStbbdev #undef fmin
244d86ed7fbStbbdev #undef fpush
245d86ed7fbStbbdev #undef fpop
246d86ed7fbStbbdev #undef fswap
247d86ed7fbStbbdev #undef fvswap
248d86ed7fbStbbdev #undef FALLBACK_QSORT_SMALL_THRESH
249d86ed7fbStbbdev #undef FALLBACK_QSORT_STACK_SIZE
250d86ed7fbStbbdev
251d86ed7fbStbbdev /*---------------------------------------------*/
252d86ed7fbStbbdev /* Pre:
253d86ed7fbStbbdev nblock > 0
254d86ed7fbStbbdev eclass exists for [0 .. nblock-1]
255d86ed7fbStbbdev ((UChar*)eclass) [0 .. nblock-1] holds block
256d86ed7fbStbbdev ptr exists for [0 .. nblock-1]
257d86ed7fbStbbdev
258d86ed7fbStbbdev Post:
259d86ed7fbStbbdev ((UChar*)eclass) [0 .. nblock-1] holds block
260d86ed7fbStbbdev All other areas of eclass destroyed
261d86ed7fbStbbdev fmap [0 .. nblock-1] holds sorted order
262d86ed7fbStbbdev bhtab [ 0 .. 2+(nblock/32) ] destroyed
263d86ed7fbStbbdev */
264d86ed7fbStbbdev
265d86ed7fbStbbdev #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz)&31))
266d86ed7fbStbbdev #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz)&31))
267d86ed7fbStbbdev #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz)&31)))
268d86ed7fbStbbdev #define WORD_BH(zz) bhtab[(zz) >> 5]
269d86ed7fbStbbdev #define UNALIGNED_BH(zz) ((zz)&0x01f)
270d86ed7fbStbbdev
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)271d86ed7fbStbbdev static void fallbackSort(UInt32* fmap, UInt32* eclass, UInt32* bhtab, Int32 nblock, Int32 verb) {
272d86ed7fbStbbdev Int32 ftab[257];
273d86ed7fbStbbdev Int32 ftabCopy[256];
274d86ed7fbStbbdev Int32 H, i, j, k, l, r, cc, cc1;
275d86ed7fbStbbdev Int32 nNotDone;
276d86ed7fbStbbdev Int32 nBhtab;
277d86ed7fbStbbdev UChar* eclass8 = (UChar*)eclass;
278d86ed7fbStbbdev
279d86ed7fbStbbdev /*--
280d86ed7fbStbbdev Initial 1-char radix sort to generate
281d86ed7fbStbbdev initial fmap and initial BH bits.
282d86ed7fbStbbdev --*/
283d86ed7fbStbbdev if (verb >= 4)
284d86ed7fbStbbdev VPrintf0(" bucket sorting ...\n");
285d86ed7fbStbbdev for (i = 0; i < 257; i++)
286d86ed7fbStbbdev ftab[i] = 0;
287d86ed7fbStbbdev for (i = 0; i < nblock; i++)
288d86ed7fbStbbdev ftab[eclass8[i]]++;
289d86ed7fbStbbdev for (i = 0; i < 256; i++)
290d86ed7fbStbbdev ftabCopy[i] = ftab[i];
291d86ed7fbStbbdev for (i = 1; i < 257; i++)
292d86ed7fbStbbdev ftab[i] += ftab[i - 1];
293d86ed7fbStbbdev
294d86ed7fbStbbdev for (i = 0; i < nblock; i++) {
295d86ed7fbStbbdev j = eclass8[i];
296d86ed7fbStbbdev k = ftab[j] - 1;
297d86ed7fbStbbdev ftab[j] = k;
298d86ed7fbStbbdev fmap[k] = i;
299d86ed7fbStbbdev }
300d86ed7fbStbbdev
301d86ed7fbStbbdev nBhtab = 2 + (nblock / 32);
302d86ed7fbStbbdev for (i = 0; i < nBhtab; i++)
303d86ed7fbStbbdev bhtab[i] = 0;
304d86ed7fbStbbdev for (i = 0; i < 256; i++)
305d86ed7fbStbbdev SET_BH(ftab[i]);
306d86ed7fbStbbdev
307d86ed7fbStbbdev /*--
308d86ed7fbStbbdev Inductively refine the buckets. Kind-of an
309d86ed7fbStbbdev "exponential radix sort" (!), inspired by the
310d86ed7fbStbbdev Manber-Myers suffix array construction algorithm.
311d86ed7fbStbbdev --*/
312d86ed7fbStbbdev
313d86ed7fbStbbdev /*-- set sentinel bits for block-end detection --*/
314d86ed7fbStbbdev for (i = 0; i < 32; i++) {
315d86ed7fbStbbdev SET_BH(nblock + 2 * i);
316d86ed7fbStbbdev CLEAR_BH(nblock + 2 * i + 1);
317d86ed7fbStbbdev }
318d86ed7fbStbbdev
319d86ed7fbStbbdev /*-- the log(N) loop --*/
320d86ed7fbStbbdev H = 1;
321d86ed7fbStbbdev while (1) {
322d86ed7fbStbbdev if (verb >= 4)
323d86ed7fbStbbdev VPrintf1(" depth %6d has ", H);
324d86ed7fbStbbdev
325d86ed7fbStbbdev j = 0;
326d86ed7fbStbbdev for (i = 0; i < nblock; i++) {
327d86ed7fbStbbdev if (ISSET_BH(i))
328d86ed7fbStbbdev j = i;
329d86ed7fbStbbdev k = fmap[i] - H;
330d86ed7fbStbbdev if (k < 0)
331d86ed7fbStbbdev k += nblock;
332d86ed7fbStbbdev eclass[k] = j;
333d86ed7fbStbbdev }
334d86ed7fbStbbdev
335d86ed7fbStbbdev nNotDone = 0;
336d86ed7fbStbbdev r = -1;
337d86ed7fbStbbdev while (1) {
338d86ed7fbStbbdev /*-- find the next non-singleton bucket --*/
339d86ed7fbStbbdev k = r + 1;
340d86ed7fbStbbdev while (ISSET_BH(k) && UNALIGNED_BH(k))
341d86ed7fbStbbdev k++;
342d86ed7fbStbbdev if (ISSET_BH(k)) {
343d86ed7fbStbbdev while (WORD_BH(k) == 0xffffffff)
344d86ed7fbStbbdev k += 32;
345d86ed7fbStbbdev while (ISSET_BH(k))
346d86ed7fbStbbdev k++;
347d86ed7fbStbbdev }
348d86ed7fbStbbdev l = k - 1;
349d86ed7fbStbbdev if (l >= nblock)
350d86ed7fbStbbdev break;
351d86ed7fbStbbdev while (!ISSET_BH(k) && UNALIGNED_BH(k))
352d86ed7fbStbbdev k++;
353d86ed7fbStbbdev if (!ISSET_BH(k)) {
354d86ed7fbStbbdev while (WORD_BH(k) == 0x00000000)
355d86ed7fbStbbdev k += 32;
356d86ed7fbStbbdev while (!ISSET_BH(k))
357d86ed7fbStbbdev k++;
358d86ed7fbStbbdev }
359d86ed7fbStbbdev r = k - 1;
360d86ed7fbStbbdev if (r >= nblock)
361d86ed7fbStbbdev break;
362d86ed7fbStbbdev
363d86ed7fbStbbdev /*-- now [l, r] bracket current bucket --*/
364d86ed7fbStbbdev if (r > l) {
365d86ed7fbStbbdev nNotDone += (r - l + 1);
366d86ed7fbStbbdev fallbackQSort3(fmap, eclass, l, r);
367d86ed7fbStbbdev
368d86ed7fbStbbdev /*-- scan bucket and generate header bits-- */
369d86ed7fbStbbdev cc = -1;
370d86ed7fbStbbdev for (i = l; i <= r; i++) {
371d86ed7fbStbbdev cc1 = eclass[fmap[i]];
372d86ed7fbStbbdev if (cc != cc1) {
373d86ed7fbStbbdev SET_BH(i);
374d86ed7fbStbbdev cc = cc1;
375d86ed7fbStbbdev };
376d86ed7fbStbbdev }
377d86ed7fbStbbdev }
378d86ed7fbStbbdev }
379d86ed7fbStbbdev
380d86ed7fbStbbdev if (verb >= 4)
381d86ed7fbStbbdev VPrintf1("%6d unresolved strings\n", nNotDone);
382d86ed7fbStbbdev
383d86ed7fbStbbdev H *= 2;
384d86ed7fbStbbdev if (H > nblock || nNotDone == 0)
385d86ed7fbStbbdev break;
386d86ed7fbStbbdev }
387d86ed7fbStbbdev
388d86ed7fbStbbdev /*--
389d86ed7fbStbbdev Reconstruct the original block in
390d86ed7fbStbbdev eclass8 [0 .. nblock-1], since the
391d86ed7fbStbbdev previous phase destroyed it.
392d86ed7fbStbbdev --*/
393d86ed7fbStbbdev if (verb >= 4)
394d86ed7fbStbbdev VPrintf0(" reconstructing block ...\n");
395d86ed7fbStbbdev j = 0;
396d86ed7fbStbbdev for (i = 0; i < nblock; i++) {
397d86ed7fbStbbdev while (ftabCopy[j] == 0)
398d86ed7fbStbbdev j++;
399d86ed7fbStbbdev ftabCopy[j]--;
400d86ed7fbStbbdev eclass8[fmap[i]] = (UChar)j;
401d86ed7fbStbbdev }
402d86ed7fbStbbdev AssertH(j < 256, 1005);
403d86ed7fbStbbdev }
404d86ed7fbStbbdev
405d86ed7fbStbbdev #undef SET_BH
406d86ed7fbStbbdev #undef CLEAR_BH
407d86ed7fbStbbdev #undef ISSET_BH
408d86ed7fbStbbdev #undef WORD_BH
409d86ed7fbStbbdev #undef UNALIGNED_BH
410d86ed7fbStbbdev
411d86ed7fbStbbdev /*---------------------------------------------*/
412d86ed7fbStbbdev /*--- The main, O(N^2 log(N)) sorting ---*/
413d86ed7fbStbbdev /*--- algorithm. Faster for "normal" ---*/
414d86ed7fbStbbdev /*--- non-repetitive blocks. ---*/
415d86ed7fbStbbdev /*---------------------------------------------*/
416d86ed7fbStbbdev
417d86ed7fbStbbdev /*---------------------------------------------*/
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)418d86ed7fbStbbdev static __inline__ Bool mainGtU(UInt32 i1,
419d86ed7fbStbbdev UInt32 i2,
420d86ed7fbStbbdev UChar* block,
421d86ed7fbStbbdev UInt16* quadrant,
422d86ed7fbStbbdev UInt32 nblock,
423d86ed7fbStbbdev Int32* budget) {
424d86ed7fbStbbdev Int32 k;
425d86ed7fbStbbdev UChar c1, c2;
426d86ed7fbStbbdev UInt16 s1, s2;
427d86ed7fbStbbdev
428d86ed7fbStbbdev AssertD(i1 != i2, "mainGtU");
429d86ed7fbStbbdev /* 1 */
430d86ed7fbStbbdev c1 = block[i1];
431d86ed7fbStbbdev c2 = block[i2];
432d86ed7fbStbbdev if (c1 != c2)
433d86ed7fbStbbdev return (c1 > c2);
434d86ed7fbStbbdev i1++;
435d86ed7fbStbbdev i2++;
436d86ed7fbStbbdev /* 2 */
437d86ed7fbStbbdev c1 = block[i1];
438d86ed7fbStbbdev c2 = block[i2];
439d86ed7fbStbbdev if (c1 != c2)
440d86ed7fbStbbdev return (c1 > c2);
441d86ed7fbStbbdev i1++;
442d86ed7fbStbbdev i2++;
443d86ed7fbStbbdev /* 3 */
444d86ed7fbStbbdev c1 = block[i1];
445d86ed7fbStbbdev c2 = block[i2];
446d86ed7fbStbbdev if (c1 != c2)
447d86ed7fbStbbdev return (c1 > c2);
448d86ed7fbStbbdev i1++;
449d86ed7fbStbbdev i2++;
450d86ed7fbStbbdev /* 4 */
451d86ed7fbStbbdev c1 = block[i1];
452d86ed7fbStbbdev c2 = block[i2];
453d86ed7fbStbbdev if (c1 != c2)
454d86ed7fbStbbdev return (c1 > c2);
455d86ed7fbStbbdev i1++;
456d86ed7fbStbbdev i2++;
457d86ed7fbStbbdev /* 5 */
458d86ed7fbStbbdev c1 = block[i1];
459d86ed7fbStbbdev c2 = block[i2];
460d86ed7fbStbbdev if (c1 != c2)
461d86ed7fbStbbdev return (c1 > c2);
462d86ed7fbStbbdev i1++;
463d86ed7fbStbbdev i2++;
464d86ed7fbStbbdev /* 6 */
465d86ed7fbStbbdev c1 = block[i1];
466d86ed7fbStbbdev c2 = block[i2];
467d86ed7fbStbbdev if (c1 != c2)
468d86ed7fbStbbdev return (c1 > c2);
469d86ed7fbStbbdev i1++;
470d86ed7fbStbbdev i2++;
471d86ed7fbStbbdev /* 7 */
472d86ed7fbStbbdev c1 = block[i1];
473d86ed7fbStbbdev c2 = block[i2];
474d86ed7fbStbbdev if (c1 != c2)
475d86ed7fbStbbdev return (c1 > c2);
476d86ed7fbStbbdev i1++;
477d86ed7fbStbbdev i2++;
478d86ed7fbStbbdev /* 8 */
479d86ed7fbStbbdev c1 = block[i1];
480d86ed7fbStbbdev c2 = block[i2];
481d86ed7fbStbbdev if (c1 != c2)
482d86ed7fbStbbdev return (c1 > c2);
483d86ed7fbStbbdev i1++;
484d86ed7fbStbbdev i2++;
485d86ed7fbStbbdev /* 9 */
486d86ed7fbStbbdev c1 = block[i1];
487d86ed7fbStbbdev c2 = block[i2];
488d86ed7fbStbbdev if (c1 != c2)
489d86ed7fbStbbdev return (c1 > c2);
490d86ed7fbStbbdev i1++;
491d86ed7fbStbbdev i2++;
492d86ed7fbStbbdev /* 10 */
493d86ed7fbStbbdev c1 = block[i1];
494d86ed7fbStbbdev c2 = block[i2];
495d86ed7fbStbbdev if (c1 != c2)
496d86ed7fbStbbdev return (c1 > c2);
497d86ed7fbStbbdev i1++;
498d86ed7fbStbbdev i2++;
499d86ed7fbStbbdev /* 11 */
500d86ed7fbStbbdev c1 = block[i1];
501d86ed7fbStbbdev c2 = block[i2];
502d86ed7fbStbbdev if (c1 != c2)
503d86ed7fbStbbdev return (c1 > c2);
504d86ed7fbStbbdev i1++;
505d86ed7fbStbbdev i2++;
506d86ed7fbStbbdev /* 12 */
507d86ed7fbStbbdev c1 = block[i1];
508d86ed7fbStbbdev c2 = block[i2];
509d86ed7fbStbbdev if (c1 != c2)
510d86ed7fbStbbdev return (c1 > c2);
511d86ed7fbStbbdev i1++;
512d86ed7fbStbbdev i2++;
513d86ed7fbStbbdev
514d86ed7fbStbbdev k = nblock + 8;
515d86ed7fbStbbdev
516d86ed7fbStbbdev do {
517d86ed7fbStbbdev /* 1 */
518d86ed7fbStbbdev c1 = block[i1];
519d86ed7fbStbbdev c2 = block[i2];
520d86ed7fbStbbdev if (c1 != c2)
521d86ed7fbStbbdev return (c1 > c2);
522d86ed7fbStbbdev s1 = quadrant[i1];
523d86ed7fbStbbdev s2 = quadrant[i2];
524d86ed7fbStbbdev if (s1 != s2)
525d86ed7fbStbbdev return (s1 > s2);
526d86ed7fbStbbdev i1++;
527d86ed7fbStbbdev i2++;
528d86ed7fbStbbdev /* 2 */
529d86ed7fbStbbdev c1 = block[i1];
530d86ed7fbStbbdev c2 = block[i2];
531d86ed7fbStbbdev if (c1 != c2)
532d86ed7fbStbbdev return (c1 > c2);
533d86ed7fbStbbdev s1 = quadrant[i1];
534d86ed7fbStbbdev s2 = quadrant[i2];
535d86ed7fbStbbdev if (s1 != s2)
536d86ed7fbStbbdev return (s1 > s2);
537d86ed7fbStbbdev i1++;
538d86ed7fbStbbdev i2++;
539d86ed7fbStbbdev /* 3 */
540d86ed7fbStbbdev c1 = block[i1];
541d86ed7fbStbbdev c2 = block[i2];
542d86ed7fbStbbdev if (c1 != c2)
543d86ed7fbStbbdev return (c1 > c2);
544d86ed7fbStbbdev s1 = quadrant[i1];
545d86ed7fbStbbdev s2 = quadrant[i2];
546d86ed7fbStbbdev if (s1 != s2)
547d86ed7fbStbbdev return (s1 > s2);
548d86ed7fbStbbdev i1++;
549d86ed7fbStbbdev i2++;
550d86ed7fbStbbdev /* 4 */
551d86ed7fbStbbdev c1 = block[i1];
552d86ed7fbStbbdev c2 = block[i2];
553d86ed7fbStbbdev if (c1 != c2)
554d86ed7fbStbbdev return (c1 > c2);
555d86ed7fbStbbdev s1 = quadrant[i1];
556d86ed7fbStbbdev s2 = quadrant[i2];
557d86ed7fbStbbdev if (s1 != s2)
558d86ed7fbStbbdev return (s1 > s2);
559d86ed7fbStbbdev i1++;
560d86ed7fbStbbdev i2++;
561d86ed7fbStbbdev /* 5 */
562d86ed7fbStbbdev c1 = block[i1];
563d86ed7fbStbbdev c2 = block[i2];
564d86ed7fbStbbdev if (c1 != c2)
565d86ed7fbStbbdev return (c1 > c2);
566d86ed7fbStbbdev s1 = quadrant[i1];
567d86ed7fbStbbdev s2 = quadrant[i2];
568d86ed7fbStbbdev if (s1 != s2)
569d86ed7fbStbbdev return (s1 > s2);
570d86ed7fbStbbdev i1++;
571d86ed7fbStbbdev i2++;
572d86ed7fbStbbdev /* 6 */
573d86ed7fbStbbdev c1 = block[i1];
574d86ed7fbStbbdev c2 = block[i2];
575d86ed7fbStbbdev if (c1 != c2)
576d86ed7fbStbbdev return (c1 > c2);
577d86ed7fbStbbdev s1 = quadrant[i1];
578d86ed7fbStbbdev s2 = quadrant[i2];
579d86ed7fbStbbdev if (s1 != s2)
580d86ed7fbStbbdev return (s1 > s2);
581d86ed7fbStbbdev i1++;
582d86ed7fbStbbdev i2++;
583d86ed7fbStbbdev /* 7 */
584d86ed7fbStbbdev c1 = block[i1];
585d86ed7fbStbbdev c2 = block[i2];
586d86ed7fbStbbdev if (c1 != c2)
587d86ed7fbStbbdev return (c1 > c2);
588d86ed7fbStbbdev s1 = quadrant[i1];
589d86ed7fbStbbdev s2 = quadrant[i2];
590d86ed7fbStbbdev if (s1 != s2)
591d86ed7fbStbbdev return (s1 > s2);
592d86ed7fbStbbdev i1++;
593d86ed7fbStbbdev i2++;
594d86ed7fbStbbdev /* 8 */
595d86ed7fbStbbdev c1 = block[i1];
596d86ed7fbStbbdev c2 = block[i2];
597d86ed7fbStbbdev if (c1 != c2)
598d86ed7fbStbbdev return (c1 > c2);
599d86ed7fbStbbdev s1 = quadrant[i1];
600d86ed7fbStbbdev s2 = quadrant[i2];
601d86ed7fbStbbdev if (s1 != s2)
602d86ed7fbStbbdev return (s1 > s2);
603d86ed7fbStbbdev i1++;
604d86ed7fbStbbdev i2++;
605d86ed7fbStbbdev
606d86ed7fbStbbdev if (i1 >= nblock)
607d86ed7fbStbbdev i1 -= nblock;
608d86ed7fbStbbdev if (i2 >= nblock)
609d86ed7fbStbbdev i2 -= nblock;
610d86ed7fbStbbdev
611d86ed7fbStbbdev k -= 8;
612d86ed7fbStbbdev (*budget)--;
613d86ed7fbStbbdev } while (k >= 0);
614d86ed7fbStbbdev
615d86ed7fbStbbdev return False;
616d86ed7fbStbbdev }
617d86ed7fbStbbdev
618d86ed7fbStbbdev /*---------------------------------------------*/
619d86ed7fbStbbdev /*--
620d86ed7fbStbbdev Knuth's increments seem to work better
621d86ed7fbStbbdev than Incerpi-Sedgewick here. Possibly
622d86ed7fbStbbdev because the number of elements to sort
623d86ed7fbStbbdev is usually small, typically <= 20.
624d86ed7fbStbbdev --*/
625d86ed7fbStbbdev static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093,
626d86ed7fbStbbdev 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
627d86ed7fbStbbdev
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)628d86ed7fbStbbdev static void mainSimpleSort(UInt32* ptr,
629d86ed7fbStbbdev UChar* block,
630d86ed7fbStbbdev UInt16* quadrant,
631d86ed7fbStbbdev Int32 nblock,
632d86ed7fbStbbdev Int32 lo,
633d86ed7fbStbbdev Int32 hi,
634d86ed7fbStbbdev Int32 d,
635d86ed7fbStbbdev Int32* budget) {
636d86ed7fbStbbdev Int32 i, j, h, bigN, hp;
637d86ed7fbStbbdev UInt32 v;
638d86ed7fbStbbdev
639d86ed7fbStbbdev bigN = hi - lo + 1;
640d86ed7fbStbbdev if (bigN < 2)
641d86ed7fbStbbdev return;
642d86ed7fbStbbdev
643d86ed7fbStbbdev hp = 0;
644d86ed7fbStbbdev while (incs[hp] < bigN)
645d86ed7fbStbbdev hp++;
646d86ed7fbStbbdev hp--;
647d86ed7fbStbbdev
648d86ed7fbStbbdev for (; hp >= 0; hp--) {
649d86ed7fbStbbdev h = incs[hp];
650d86ed7fbStbbdev
651d86ed7fbStbbdev i = lo + h;
652d86ed7fbStbbdev while (True) {
653d86ed7fbStbbdev /*-- copy 1 --*/
654d86ed7fbStbbdev if (i > hi)
655d86ed7fbStbbdev break;
656d86ed7fbStbbdev v = ptr[i];
657d86ed7fbStbbdev j = i;
658d86ed7fbStbbdev while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
659d86ed7fbStbbdev ptr[j] = ptr[j - h];
660d86ed7fbStbbdev j = j - h;
661d86ed7fbStbbdev if (j <= (lo + h - 1))
662d86ed7fbStbbdev break;
663d86ed7fbStbbdev }
664d86ed7fbStbbdev ptr[j] = v;
665d86ed7fbStbbdev i++;
666d86ed7fbStbbdev
667d86ed7fbStbbdev /*-- copy 2 --*/
668d86ed7fbStbbdev if (i > hi)
669d86ed7fbStbbdev break;
670d86ed7fbStbbdev v = ptr[i];
671d86ed7fbStbbdev j = i;
672d86ed7fbStbbdev while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
673d86ed7fbStbbdev ptr[j] = ptr[j - h];
674d86ed7fbStbbdev j = j - h;
675d86ed7fbStbbdev if (j <= (lo + h - 1))
676d86ed7fbStbbdev break;
677d86ed7fbStbbdev }
678d86ed7fbStbbdev ptr[j] = v;
679d86ed7fbStbbdev i++;
680d86ed7fbStbbdev
681d86ed7fbStbbdev /*-- copy 3 --*/
682d86ed7fbStbbdev if (i > hi)
683d86ed7fbStbbdev break;
684d86ed7fbStbbdev v = ptr[i];
685d86ed7fbStbbdev j = i;
686d86ed7fbStbbdev while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
687d86ed7fbStbbdev ptr[j] = ptr[j - h];
688d86ed7fbStbbdev j = j - h;
689d86ed7fbStbbdev if (j <= (lo + h - 1))
690d86ed7fbStbbdev break;
691d86ed7fbStbbdev }
692d86ed7fbStbbdev ptr[j] = v;
693d86ed7fbStbbdev i++;
694d86ed7fbStbbdev
695d86ed7fbStbbdev if (*budget < 0)
696d86ed7fbStbbdev return;
697d86ed7fbStbbdev }
698d86ed7fbStbbdev }
699d86ed7fbStbbdev }
700d86ed7fbStbbdev
701d86ed7fbStbbdev /*---------------------------------------------*/
702d86ed7fbStbbdev /*--
703d86ed7fbStbbdev The following is an implementation of
704d86ed7fbStbbdev an elegant 3-way quicksort for strings,
705d86ed7fbStbbdev described in a paper "Fast Algorithms for
706d86ed7fbStbbdev Sorting and Searching Strings", by Robert
707d86ed7fbStbbdev Sedgewick and Jon L. Bentley.
708d86ed7fbStbbdev --*/
709d86ed7fbStbbdev
710d86ed7fbStbbdev #define mswap(zz1, zz2) \
711d86ed7fbStbbdev { \
712d86ed7fbStbbdev Int32 zztmp = zz1; \
713d86ed7fbStbbdev zz1 = zz2; \
714d86ed7fbStbbdev zz2 = zztmp; \
715d86ed7fbStbbdev }
716d86ed7fbStbbdev
717d86ed7fbStbbdev #define mvswap(zzp1, zzp2, zzn) \
718d86ed7fbStbbdev { \
719d86ed7fbStbbdev Int32 yyp1 = (zzp1); \
720d86ed7fbStbbdev Int32 yyp2 = (zzp2); \
721d86ed7fbStbbdev Int32 yyn = (zzn); \
722d86ed7fbStbbdev while (yyn > 0) { \
723d86ed7fbStbbdev mswap(ptr[yyp1], ptr[yyp2]); \
724d86ed7fbStbbdev yyp1++; \
725d86ed7fbStbbdev yyp2++; \
726d86ed7fbStbbdev yyn--; \
727d86ed7fbStbbdev } \
728d86ed7fbStbbdev }
729d86ed7fbStbbdev
mmed3(UChar a,UChar b,UChar c)730d86ed7fbStbbdev static __inline__ UChar mmed3(UChar a, UChar b, UChar c) {
731d86ed7fbStbbdev UChar t;
732d86ed7fbStbbdev if (a > b) {
733d86ed7fbStbbdev t = a;
734d86ed7fbStbbdev a = b;
735d86ed7fbStbbdev b = t;
736d86ed7fbStbbdev };
737d86ed7fbStbbdev if (b > c) {
738d86ed7fbStbbdev b = c;
739d86ed7fbStbbdev if (a > b)
740d86ed7fbStbbdev b = a;
741d86ed7fbStbbdev }
742d86ed7fbStbbdev return b;
743d86ed7fbStbbdev }
744d86ed7fbStbbdev
745d86ed7fbStbbdev #define mmin(a, b) ((a) < (b)) ? (a) : (b)
746d86ed7fbStbbdev
747d86ed7fbStbbdev #define mpush(lz, hz, dz) \
748d86ed7fbStbbdev { \
749d86ed7fbStbbdev stackLo[sp] = lz; \
750d86ed7fbStbbdev stackHi[sp] = hz; \
751d86ed7fbStbbdev stackD[sp] = dz; \
752d86ed7fbStbbdev sp++; \
753d86ed7fbStbbdev }
754d86ed7fbStbbdev
755d86ed7fbStbbdev #define mpop(lz, hz, dz) \
756d86ed7fbStbbdev { \
757d86ed7fbStbbdev sp--; \
758d86ed7fbStbbdev lz = stackLo[sp]; \
759d86ed7fbStbbdev hz = stackHi[sp]; \
760d86ed7fbStbbdev dz = stackD[sp]; \
761d86ed7fbStbbdev }
762d86ed7fbStbbdev
763d86ed7fbStbbdev #define mnextsize(az) (nextHi[az] - nextLo[az])
764d86ed7fbStbbdev
765d86ed7fbStbbdev #define mnextswap(az, bz) \
766d86ed7fbStbbdev { \
767d86ed7fbStbbdev Int32 tz; \
768d86ed7fbStbbdev tz = nextLo[az]; \
769d86ed7fbStbbdev nextLo[az] = nextLo[bz]; \
770d86ed7fbStbbdev nextLo[bz] = tz; \
771d86ed7fbStbbdev tz = nextHi[az]; \
772d86ed7fbStbbdev nextHi[az] = nextHi[bz]; \
773d86ed7fbStbbdev nextHi[bz] = tz; \
774d86ed7fbStbbdev tz = nextD[az]; \
775d86ed7fbStbbdev nextD[az] = nextD[bz]; \
776d86ed7fbStbbdev nextD[bz] = tz; \
777d86ed7fbStbbdev }
778d86ed7fbStbbdev
779d86ed7fbStbbdev #define MAIN_QSORT_SMALL_THRESH 20
780d86ed7fbStbbdev #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
781d86ed7fbStbbdev #define MAIN_QSORT_STACK_SIZE 100
782d86ed7fbStbbdev
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)783d86ed7fbStbbdev static void mainQSort3(UInt32* ptr,
784d86ed7fbStbbdev UChar* block,
785d86ed7fbStbbdev UInt16* quadrant,
786d86ed7fbStbbdev Int32 nblock,
787d86ed7fbStbbdev Int32 loSt,
788d86ed7fbStbbdev Int32 hiSt,
789d86ed7fbStbbdev Int32 dSt,
790d86ed7fbStbbdev Int32* budget) {
791d86ed7fbStbbdev Int32 unLo, unHi, ltLo, gtHi, n, m, med;
792d86ed7fbStbbdev Int32 sp, lo, hi, d;
793d86ed7fbStbbdev
794d86ed7fbStbbdev Int32 stackLo[MAIN_QSORT_STACK_SIZE];
795d86ed7fbStbbdev Int32 stackHi[MAIN_QSORT_STACK_SIZE];
796d86ed7fbStbbdev Int32 stackD[MAIN_QSORT_STACK_SIZE];
797d86ed7fbStbbdev
798d86ed7fbStbbdev Int32 nextLo[3];
799d86ed7fbStbbdev Int32 nextHi[3];
800d86ed7fbStbbdev Int32 nextD[3];
801d86ed7fbStbbdev
802d86ed7fbStbbdev sp = 0;
803d86ed7fbStbbdev mpush(loSt, hiSt, dSt);
804d86ed7fbStbbdev
805d86ed7fbStbbdev while (sp > 0) {
806d86ed7fbStbbdev AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
807d86ed7fbStbbdev
808d86ed7fbStbbdev mpop(lo, hi, d);
809d86ed7fbStbbdev if (hi - lo < MAIN_QSORT_SMALL_THRESH || d > MAIN_QSORT_DEPTH_THRESH) {
810d86ed7fbStbbdev mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
811d86ed7fbStbbdev if (*budget < 0)
812d86ed7fbStbbdev return;
813d86ed7fbStbbdev continue;
814d86ed7fbStbbdev }
815d86ed7fbStbbdev
816d86ed7fbStbbdev med = (Int32)mmed3(block[ptr[lo] + d], block[ptr[hi] + d], block[ptr[(lo + hi) >> 1] + d]);
817d86ed7fbStbbdev
818d86ed7fbStbbdev unLo = ltLo = lo;
819d86ed7fbStbbdev unHi = gtHi = hi;
820d86ed7fbStbbdev
821d86ed7fbStbbdev while (True) {
822d86ed7fbStbbdev while (True) {
823d86ed7fbStbbdev if (unLo > unHi)
824d86ed7fbStbbdev break;
825d86ed7fbStbbdev n = ((Int32)block[ptr[unLo] + d]) - med;
826d86ed7fbStbbdev if (n == 0) {
827d86ed7fbStbbdev mswap(ptr[unLo], ptr[ltLo]);
828d86ed7fbStbbdev ltLo++;
829d86ed7fbStbbdev unLo++;
830d86ed7fbStbbdev continue;
831d86ed7fbStbbdev };
832d86ed7fbStbbdev if (n > 0)
833d86ed7fbStbbdev break;
834d86ed7fbStbbdev unLo++;
835d86ed7fbStbbdev }
836d86ed7fbStbbdev while (True) {
837d86ed7fbStbbdev if (unLo > unHi)
838d86ed7fbStbbdev break;
839d86ed7fbStbbdev n = ((Int32)block[ptr[unHi] + d]) - med;
840d86ed7fbStbbdev if (n == 0) {
841d86ed7fbStbbdev mswap(ptr[unHi], ptr[gtHi]);
842d86ed7fbStbbdev gtHi--;
843d86ed7fbStbbdev unHi--;
844d86ed7fbStbbdev continue;
845d86ed7fbStbbdev };
846d86ed7fbStbbdev if (n < 0)
847d86ed7fbStbbdev break;
848d86ed7fbStbbdev unHi--;
849d86ed7fbStbbdev }
850d86ed7fbStbbdev if (unLo > unHi)
851d86ed7fbStbbdev break;
852d86ed7fbStbbdev mswap(ptr[unLo], ptr[unHi]);
853d86ed7fbStbbdev unLo++;
854d86ed7fbStbbdev unHi--;
855d86ed7fbStbbdev }
856d86ed7fbStbbdev
857d86ed7fbStbbdev AssertD(unHi == unLo - 1, "mainQSort3(2)");
858d86ed7fbStbbdev
859d86ed7fbStbbdev if (gtHi < ltLo) {
860d86ed7fbStbbdev mpush(lo, hi, d + 1);
861d86ed7fbStbbdev continue;
862d86ed7fbStbbdev }
863d86ed7fbStbbdev
864d86ed7fbStbbdev n = mmin(ltLo - lo, unLo - ltLo);
865d86ed7fbStbbdev mvswap(lo, unLo - n, n);
866d86ed7fbStbbdev m = mmin(hi - gtHi, gtHi - unHi);
867d86ed7fbStbbdev mvswap(unLo, hi - m + 1, m);
868d86ed7fbStbbdev
869d86ed7fbStbbdev n = lo + unLo - ltLo - 1;
870d86ed7fbStbbdev m = hi - (gtHi - unHi) + 1;
871d86ed7fbStbbdev
872d86ed7fbStbbdev nextLo[0] = lo;
873d86ed7fbStbbdev nextHi[0] = n;
874d86ed7fbStbbdev nextD[0] = d;
875d86ed7fbStbbdev nextLo[1] = m;
876d86ed7fbStbbdev nextHi[1] = hi;
877d86ed7fbStbbdev nextD[1] = d;
878d86ed7fbStbbdev nextLo[2] = n + 1;
879d86ed7fbStbbdev nextHi[2] = m - 1;
880d86ed7fbStbbdev nextD[2] = d + 1;
881d86ed7fbStbbdev
882d86ed7fbStbbdev if (mnextsize(0) < mnextsize(1))
883d86ed7fbStbbdev mnextswap(0, 1);
884d86ed7fbStbbdev if (mnextsize(1) < mnextsize(2))
885d86ed7fbStbbdev mnextswap(1, 2);
886d86ed7fbStbbdev if (mnextsize(0) < mnextsize(1))
887d86ed7fbStbbdev mnextswap(0, 1);
888d86ed7fbStbbdev
889d86ed7fbStbbdev AssertD(mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
890d86ed7fbStbbdev AssertD(mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
891d86ed7fbStbbdev
892d86ed7fbStbbdev mpush(nextLo[0], nextHi[0], nextD[0]);
893d86ed7fbStbbdev mpush(nextLo[1], nextHi[1], nextD[1]);
894d86ed7fbStbbdev mpush(nextLo[2], nextHi[2], nextD[2]);
895d86ed7fbStbbdev }
896d86ed7fbStbbdev }
897d86ed7fbStbbdev
898d86ed7fbStbbdev #undef mswap
899d86ed7fbStbbdev #undef mvswap
900d86ed7fbStbbdev #undef mpush
901d86ed7fbStbbdev #undef mpop
902d86ed7fbStbbdev #undef mmin
903d86ed7fbStbbdev #undef mnextsize
904d86ed7fbStbbdev #undef mnextswap
905d86ed7fbStbbdev #undef MAIN_QSORT_SMALL_THRESH
906d86ed7fbStbbdev #undef MAIN_QSORT_DEPTH_THRESH
907d86ed7fbStbbdev #undef MAIN_QSORT_STACK_SIZE
908d86ed7fbStbbdev
909d86ed7fbStbbdev /*---------------------------------------------*/
910d86ed7fbStbbdev /* Pre:
911d86ed7fbStbbdev nblock > N_OVERSHOOT
912d86ed7fbStbbdev block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
913d86ed7fbStbbdev ((UChar*)block32) [0 .. nblock-1] holds block
914d86ed7fbStbbdev ptr exists for [0 .. nblock-1]
915d86ed7fbStbbdev
916d86ed7fbStbbdev Post:
917d86ed7fbStbbdev ((UChar*)block32) [0 .. nblock-1] holds block
918d86ed7fbStbbdev All other areas of block32 destroyed
919d86ed7fbStbbdev ftab [0 .. 65536 ] destroyed
920d86ed7fbStbbdev ptr [0 .. nblock-1] holds sorted order
921d86ed7fbStbbdev if (*budget < 0), sorting was abandoned
922d86ed7fbStbbdev */
923d86ed7fbStbbdev
924d86ed7fbStbbdev #define BIGFREQ(b) (ftab[((b) + 1) << 8] - ftab[(b) << 8])
925d86ed7fbStbbdev #define SETMASK (1 << 21)
926d86ed7fbStbbdev #define CLEARMASK (~(SETMASK))
927d86ed7fbStbbdev
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)928d86ed7fbStbbdev static void mainSort(UInt32* ptr,
929d86ed7fbStbbdev UChar* block,
930d86ed7fbStbbdev UInt16* quadrant,
931d86ed7fbStbbdev UInt32* ftab,
932d86ed7fbStbbdev Int32 nblock,
933d86ed7fbStbbdev Int32 verb,
934d86ed7fbStbbdev Int32* budget) {
935d86ed7fbStbbdev Int32 i, j, k, ss, sb;
936d86ed7fbStbbdev Int32 runningOrder[256];
937d86ed7fbStbbdev Bool bigDone[256];
938d86ed7fbStbbdev Int32 copyStart[256];
939d86ed7fbStbbdev Int32 copyEnd[256];
940d86ed7fbStbbdev UChar c1;
941d86ed7fbStbbdev Int32 numQSorted;
942d86ed7fbStbbdev UInt16 s;
943d86ed7fbStbbdev if (verb >= 4)
944d86ed7fbStbbdev VPrintf0(" main sort initialise ...\n");
945d86ed7fbStbbdev
946d86ed7fbStbbdev /*-- set up the 2-byte frequency table --*/
947d86ed7fbStbbdev for (i = 65536; i >= 0; i--)
948d86ed7fbStbbdev ftab[i] = 0;
949d86ed7fbStbbdev
950d86ed7fbStbbdev j = block[0] << 8;
951d86ed7fbStbbdev i = nblock - 1;
952d86ed7fbStbbdev for (; i >= 3; i -= 4) {
953d86ed7fbStbbdev quadrant[i] = 0;
954d86ed7fbStbbdev j = (j >> 8) | (((UInt16)block[i]) << 8);
955d86ed7fbStbbdev ftab[j]++;
956d86ed7fbStbbdev quadrant[i - 1] = 0;
957d86ed7fbStbbdev j = (j >> 8) | (((UInt16)block[i - 1]) << 8);
958d86ed7fbStbbdev ftab[j]++;
959d86ed7fbStbbdev quadrant[i - 2] = 0;
960d86ed7fbStbbdev j = (j >> 8) | (((UInt16)block[i - 2]) << 8);
961d86ed7fbStbbdev ftab[j]++;
962d86ed7fbStbbdev quadrant[i - 3] = 0;
963d86ed7fbStbbdev j = (j >> 8) | (((UInt16)block[i - 3]) << 8);
964d86ed7fbStbbdev ftab[j]++;
965d86ed7fbStbbdev }
966d86ed7fbStbbdev for (; i >= 0; i--) {
967d86ed7fbStbbdev quadrant[i] = 0;
968d86ed7fbStbbdev j = (j >> 8) | (((UInt16)block[i]) << 8);
969d86ed7fbStbbdev ftab[j]++;
970d86ed7fbStbbdev }
971d86ed7fbStbbdev
972d86ed7fbStbbdev /*-- (emphasises close relationship of block & quadrant) --*/
973d86ed7fbStbbdev for (i = 0; i < BZ_N_OVERSHOOT; i++) {
974d86ed7fbStbbdev block[nblock + i] = block[i];
975d86ed7fbStbbdev quadrant[nblock + i] = 0;
976d86ed7fbStbbdev }
977d86ed7fbStbbdev
978d86ed7fbStbbdev if (verb >= 4)
979d86ed7fbStbbdev VPrintf0(" bucket sorting ...\n");
980d86ed7fbStbbdev
981d86ed7fbStbbdev /*-- Complete the initial radix sort --*/
982d86ed7fbStbbdev for (i = 1; i <= 65536; i++)
983d86ed7fbStbbdev ftab[i] += ftab[i - 1];
984d86ed7fbStbbdev
985d86ed7fbStbbdev s = block[0] << 8;
986d86ed7fbStbbdev i = nblock - 1;
987d86ed7fbStbbdev for (; i >= 3; i -= 4) {
988d86ed7fbStbbdev s = (s >> 8) | (block[i] << 8);
989d86ed7fbStbbdev j = ftab[s] - 1;
990d86ed7fbStbbdev ftab[s] = j;
991d86ed7fbStbbdev ptr[j] = i;
992d86ed7fbStbbdev s = (s >> 8) | (block[i - 1] << 8);
993d86ed7fbStbbdev j = ftab[s] - 1;
994d86ed7fbStbbdev ftab[s] = j;
995d86ed7fbStbbdev ptr[j] = i - 1;
996d86ed7fbStbbdev s = (s >> 8) | (block[i - 2] << 8);
997d86ed7fbStbbdev j = ftab[s] - 1;
998d86ed7fbStbbdev ftab[s] = j;
999d86ed7fbStbbdev ptr[j] = i - 2;
1000d86ed7fbStbbdev s = (s >> 8) | (block[i - 3] << 8);
1001d86ed7fbStbbdev j = ftab[s] - 1;
1002d86ed7fbStbbdev ftab[s] = j;
1003d86ed7fbStbbdev ptr[j] = i - 3;
1004d86ed7fbStbbdev }
1005d86ed7fbStbbdev for (; i >= 0; i--) {
1006d86ed7fbStbbdev s = (s >> 8) | (block[i] << 8);
1007d86ed7fbStbbdev j = ftab[s] - 1;
1008d86ed7fbStbbdev ftab[s] = j;
1009d86ed7fbStbbdev ptr[j] = i;
1010d86ed7fbStbbdev }
1011d86ed7fbStbbdev
1012d86ed7fbStbbdev /*--
1013d86ed7fbStbbdev Now ftab contains the first loc of every small bucket.
1014d86ed7fbStbbdev Calculate the running order, from smallest to largest
1015d86ed7fbStbbdev big bucket.
1016d86ed7fbStbbdev --*/
1017d86ed7fbStbbdev for (i = 0; i <= 255; i++) {
1018d86ed7fbStbbdev bigDone[i] = False;
1019d86ed7fbStbbdev runningOrder[i] = i;
1020d86ed7fbStbbdev }
1021d86ed7fbStbbdev
1022d86ed7fbStbbdev {
1023d86ed7fbStbbdev Int32 vv;
1024d86ed7fbStbbdev Int32 h = 1;
1025d86ed7fbStbbdev do
1026d86ed7fbStbbdev h = 3 * h + 1;
1027d86ed7fbStbbdev while (h <= 256);
1028d86ed7fbStbbdev do {
1029d86ed7fbStbbdev h = h / 3;
1030d86ed7fbStbbdev for (i = h; i <= 255; i++) {
1031d86ed7fbStbbdev vv = runningOrder[i];
1032d86ed7fbStbbdev j = i;
1033d86ed7fbStbbdev while (BIGFREQ(runningOrder[j - h]) > BIGFREQ(vv)) {
1034d86ed7fbStbbdev runningOrder[j] = runningOrder[j - h];
1035d86ed7fbStbbdev j = j - h;
1036d86ed7fbStbbdev if (j <= (h - 1))
1037d86ed7fbStbbdev goto zero;
1038d86ed7fbStbbdev }
1039d86ed7fbStbbdev zero:
1040d86ed7fbStbbdev runningOrder[j] = vv;
1041d86ed7fbStbbdev }
1042d86ed7fbStbbdev } while (h != 1);
1043d86ed7fbStbbdev }
1044d86ed7fbStbbdev
1045d86ed7fbStbbdev /*--
1046d86ed7fbStbbdev The main sorting loop.
1047d86ed7fbStbbdev --*/
1048d86ed7fbStbbdev
1049d86ed7fbStbbdev numQSorted = 0;
1050d86ed7fbStbbdev
1051d86ed7fbStbbdev for (i = 0; i <= 255; i++) {
1052d86ed7fbStbbdev /*--
1053d86ed7fbStbbdev Process big buckets, starting with the least full.
1054d86ed7fbStbbdev Basically this is a 3-step process in which we call
1055d86ed7fbStbbdev mainQSort3 to sort the small buckets [ss, j], but
1056d86ed7fbStbbdev also make a big effort to avoid the calls if we can.
1057d86ed7fbStbbdev --*/
1058d86ed7fbStbbdev ss = runningOrder[i];
1059d86ed7fbStbbdev
1060d86ed7fbStbbdev /*--
1061d86ed7fbStbbdev Step 1:
1062d86ed7fbStbbdev Complete the big bucket [ss] by quicksorting
1063d86ed7fbStbbdev any unsorted small buckets [ss, j], for j != ss.
1064d86ed7fbStbbdev Hopefully previous pointer-scanning phases have already
1065d86ed7fbStbbdev completed many of the small buckets [ss, j], so
1066d86ed7fbStbbdev we don't have to sort them at all.
1067d86ed7fbStbbdev --*/
1068d86ed7fbStbbdev for (j = 0; j <= 255; j++) {
1069d86ed7fbStbbdev if (j != ss) {
1070d86ed7fbStbbdev sb = (ss << 8) + j;
1071d86ed7fbStbbdev if (!(ftab[sb] & SETMASK)) {
1072d86ed7fbStbbdev Int32 lo = ftab[sb] & CLEARMASK;
1073d86ed7fbStbbdev Int32 hi = (ftab[sb + 1] & CLEARMASK) - 1;
1074d86ed7fbStbbdev if (hi > lo) {
1075d86ed7fbStbbdev if (verb >= 4)
1076d86ed7fbStbbdev VPrintf4(" qsort [0x%x, 0x%x] "
1077d86ed7fbStbbdev "done %d this %d\n",
1078d86ed7fbStbbdev ss,
1079d86ed7fbStbbdev j,
1080d86ed7fbStbbdev numQSorted,
1081d86ed7fbStbbdev hi - lo + 1);
1082d86ed7fbStbbdev mainQSort3(ptr, block, quadrant, nblock, lo, hi, BZ_N_RADIX, budget);
1083d86ed7fbStbbdev numQSorted += (hi - lo + 1);
1084d86ed7fbStbbdev if (*budget < 0)
1085d86ed7fbStbbdev return;
1086d86ed7fbStbbdev }
1087d86ed7fbStbbdev }
1088d86ed7fbStbbdev ftab[sb] |= SETMASK;
1089d86ed7fbStbbdev }
1090d86ed7fbStbbdev }
1091d86ed7fbStbbdev
1092d86ed7fbStbbdev AssertH(!bigDone[ss], 1006);
1093d86ed7fbStbbdev
1094d86ed7fbStbbdev /*--
1095d86ed7fbStbbdev Step 2:
1096d86ed7fbStbbdev Now scan this big bucket [ss] so as to synthesise the
1097d86ed7fbStbbdev sorted order for small buckets [t, ss] for all t,
1098d86ed7fbStbbdev including, magically, the bucket [ss,ss] too.
1099d86ed7fbStbbdev This will avoid doing Real Work in subsequent Step 1's.
1100d86ed7fbStbbdev --*/
1101d86ed7fbStbbdev {
1102d86ed7fbStbbdev for (j = 0; j <= 255; j++) {
1103d86ed7fbStbbdev copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
1104d86ed7fbStbbdev copyEnd[j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
1105d86ed7fbStbbdev }
1106d86ed7fbStbbdev for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
1107d86ed7fbStbbdev k = ptr[j] - 1;
1108d86ed7fbStbbdev if (k < 0)
1109d86ed7fbStbbdev k += nblock;
1110d86ed7fbStbbdev c1 = block[k];
1111d86ed7fbStbbdev if (!bigDone[c1])
1112d86ed7fbStbbdev ptr[copyStart[c1]++] = k;
1113d86ed7fbStbbdev }
1114d86ed7fbStbbdev for (j = (ftab[(ss + 1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
1115d86ed7fbStbbdev k = ptr[j] - 1;
1116d86ed7fbStbbdev if (k < 0)
1117d86ed7fbStbbdev k += nblock;
1118d86ed7fbStbbdev c1 = block[k];
1119d86ed7fbStbbdev if (!bigDone[c1])
1120d86ed7fbStbbdev ptr[copyEnd[c1]--] = k;
1121d86ed7fbStbbdev }
1122d86ed7fbStbbdev }
1123d86ed7fbStbbdev
1124d86ed7fbStbbdev AssertH((copyStart[ss] - 1 == copyEnd[ss]) ||
1125d86ed7fbStbbdev /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
1126d86ed7fbStbbdev Necessity for this case is demonstrated by compressing
1127d86ed7fbStbbdev a sequence of approximately 48.5 million of character
1128d86ed7fbStbbdev 251; 1.0.0/1.0.1 will then die here. */
1129d86ed7fbStbbdev (copyStart[ss] == 0 && copyEnd[ss] == nblock - 1),
1130d86ed7fbStbbdev 1007)
1131d86ed7fbStbbdev
1132d86ed7fbStbbdev for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
1133d86ed7fbStbbdev
1134d86ed7fbStbbdev /*--
1135d86ed7fbStbbdev Step 3:
1136d86ed7fbStbbdev The [ss] big bucket is now done. Record this fact,
1137d86ed7fbStbbdev and update the quadrant descriptors. Remember to
1138d86ed7fbStbbdev update quadrants in the overshoot area too, if
1139d86ed7fbStbbdev necessary. The "if (i < 255)" test merely skips
1140d86ed7fbStbbdev this updating for the last bucket processed, since
1141d86ed7fbStbbdev updating for the last bucket is pointless.
1142d86ed7fbStbbdev
1143d86ed7fbStbbdev The quadrant array provides a way to incrementally
1144d86ed7fbStbbdev cache sort orderings, as they appear, so as to
1145d86ed7fbStbbdev make subsequent comparisons in fullGtU() complete
1146d86ed7fbStbbdev faster. For repetitive blocks this makes a big
1147d86ed7fbStbbdev difference (but not big enough to be able to avoid
1148d86ed7fbStbbdev the fallback sorting mechanism, exponential radix sort).
1149d86ed7fbStbbdev
1150d86ed7fbStbbdev The precise meaning is: at all times:
1151d86ed7fbStbbdev
1152d86ed7fbStbbdev for 0 <= i < nblock and 0 <= j <= nblock
1153d86ed7fbStbbdev
1154d86ed7fbStbbdev if block[i] != block[j],
1155d86ed7fbStbbdev
1156d86ed7fbStbbdev then the relative values of quadrant[i] and
1157d86ed7fbStbbdev quadrant[j] are meaningless.
1158d86ed7fbStbbdev
1159d86ed7fbStbbdev else {
1160d86ed7fbStbbdev if quadrant[i] < quadrant[j]
1161d86ed7fbStbbdev then the string starting at i lexicographically
1162d86ed7fbStbbdev precedes the string starting at j
1163d86ed7fbStbbdev
1164d86ed7fbStbbdev else if quadrant[i] > quadrant[j]
1165d86ed7fbStbbdev then the string starting at j lexicographically
1166d86ed7fbStbbdev precedes the string starting at i
1167d86ed7fbStbbdev
1168d86ed7fbStbbdev else
1169d86ed7fbStbbdev the relative ordering of the strings starting
1170d86ed7fbStbbdev at i and j has not yet been determined.
1171d86ed7fbStbbdev }
1172d86ed7fbStbbdev --*/
1173d86ed7fbStbbdev bigDone[ss] = True;
1174d86ed7fbStbbdev
1175d86ed7fbStbbdev if (i < 255) {
1176d86ed7fbStbbdev Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1177d86ed7fbStbbdev Int32 bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1178d86ed7fbStbbdev Int32 shifts = 0;
1179d86ed7fbStbbdev
1180d86ed7fbStbbdev while ((bbSize >> shifts) > 65534)
1181d86ed7fbStbbdev shifts++;
1182d86ed7fbStbbdev
1183d86ed7fbStbbdev for (j = bbSize - 1; j >= 0; j--) {
1184d86ed7fbStbbdev Int32 a2update = ptr[bbStart + j];
1185d86ed7fbStbbdev UInt16 qVal = (UInt16)(j >> shifts);
1186d86ed7fbStbbdev quadrant[a2update] = qVal;
1187d86ed7fbStbbdev if (a2update < BZ_N_OVERSHOOT)
1188d86ed7fbStbbdev quadrant[a2update + nblock] = qVal;
1189d86ed7fbStbbdev }
1190d86ed7fbStbbdev AssertH(((bbSize - 1) >> shifts) <= 65535, 1002);
1191d86ed7fbStbbdev }
1192d86ed7fbStbbdev }
1193d86ed7fbStbbdev
1194d86ed7fbStbbdev if (verb >= 4)
1195d86ed7fbStbbdev VPrintf3(" %d pointers, %d sorted, %d scanned\n",
1196d86ed7fbStbbdev nblock,
1197d86ed7fbStbbdev numQSorted,
1198d86ed7fbStbbdev nblock - numQSorted);
1199d86ed7fbStbbdev }
1200d86ed7fbStbbdev
1201d86ed7fbStbbdev #undef BIGFREQ
1202d86ed7fbStbbdev #undef SETMASK
1203d86ed7fbStbbdev #undef CLEARMASK
1204d86ed7fbStbbdev
1205d86ed7fbStbbdev /*---------------------------------------------*/
1206d86ed7fbStbbdev /* Pre:
1207d86ed7fbStbbdev nblock > 0
1208d86ed7fbStbbdev arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1209d86ed7fbStbbdev ((UChar*)arr2) [0 .. nblock-1] holds block
1210d86ed7fbStbbdev arr1 exists for [0 .. nblock-1]
1211d86ed7fbStbbdev
1212d86ed7fbStbbdev Post:
1213d86ed7fbStbbdev ((UChar*)arr2) [0 .. nblock-1] holds block
1214d86ed7fbStbbdev All other areas of block destroyed
1215d86ed7fbStbbdev ftab [ 0 .. 65536 ] destroyed
1216d86ed7fbStbbdev arr1 [0 .. nblock-1] holds sorted order
1217d86ed7fbStbbdev */
BZ2_blockSort(EState * s)1218d86ed7fbStbbdev void BZ2_blockSort(EState* s) {
1219d86ed7fbStbbdev UInt32* ptr = s->ptr;
1220d86ed7fbStbbdev UChar* block = s->block;
1221d86ed7fbStbbdev UInt32* ftab = s->ftab;
1222d86ed7fbStbbdev Int32 nblock = s->nblock;
1223d86ed7fbStbbdev Int32 verb = s->verbosity;
1224d86ed7fbStbbdev Int32 wfact = s->workFactor;
1225d86ed7fbStbbdev UInt16* quadrant;
1226d86ed7fbStbbdev Int32 budget;
1227d86ed7fbStbbdev Int32 budgetInit;
1228d86ed7fbStbbdev Int32 i;
1229d86ed7fbStbbdev
1230d86ed7fbStbbdev if (nblock < 10000) {
1231d86ed7fbStbbdev fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1232d86ed7fbStbbdev }
1233d86ed7fbStbbdev else {
1234d86ed7fbStbbdev /* Calculate the location for quadrant, remembering to get
1235d86ed7fbStbbdev the alignment right. Assumes that &(block[0]) is at least
1236d86ed7fbStbbdev 2-byte aligned -- this should be ok since block is really
1237d86ed7fbStbbdev the first section of arr2.
1238d86ed7fbStbbdev */
1239d86ed7fbStbbdev i = nblock + BZ_N_OVERSHOOT;
1240d86ed7fbStbbdev if (i & 1)
1241d86ed7fbStbbdev i++;
1242d86ed7fbStbbdev quadrant = (UInt16*)(&(block[i]));
1243d86ed7fbStbbdev
1244d86ed7fbStbbdev /* (wfact-1) / 3 puts the default-factor-30
1245d86ed7fbStbbdev transition point at very roughly the same place as
1246d86ed7fbStbbdev with v0.1 and v0.9.0.
1247d86ed7fbStbbdev Not that it particularly matters any more, since the
1248d86ed7fbStbbdev resulting compressed stream is now the same regardless
1249d86ed7fbStbbdev of whether or not we use the main sort or fallback sort.
1250d86ed7fbStbbdev */
1251d86ed7fbStbbdev if (wfact < 1)
1252d86ed7fbStbbdev wfact = 1;
1253d86ed7fbStbbdev if (wfact > 100)
1254d86ed7fbStbbdev wfact = 100;
1255d86ed7fbStbbdev budgetInit = nblock * ((wfact - 1) / 3);
1256d86ed7fbStbbdev budget = budgetInit;
1257d86ed7fbStbbdev
1258d86ed7fbStbbdev mainSort(ptr, block, quadrant, ftab, nblock, verb, &budget);
1259d86ed7fbStbbdev if (verb >= 3)
1260d86ed7fbStbbdev VPrintf3(" %d work, %d block, ratio %5.2f\n",
1261d86ed7fbStbbdev budgetInit - budget,
1262d86ed7fbStbbdev nblock,
1263d86ed7fbStbbdev (float)(budgetInit - budget) / (float)(nblock == 0 ? 1 : nblock));
1264d86ed7fbStbbdev if (budget < 0) {
1265d86ed7fbStbbdev if (verb >= 2)
1266d86ed7fbStbbdev VPrintf0(" too repetitive; using fallback"
1267d86ed7fbStbbdev " sorting algorithm\n");
1268d86ed7fbStbbdev fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1269d86ed7fbStbbdev }
1270d86ed7fbStbbdev }
1271d86ed7fbStbbdev
1272d86ed7fbStbbdev s->origPtr = -1;
1273d86ed7fbStbbdev for (i = 0; i < s->nblock; i++)
1274d86ed7fbStbbdev if (ptr[i] == 0) {
1275d86ed7fbStbbdev s->origPtr = i;
1276d86ed7fbStbbdev break;
1277d86ed7fbStbbdev };
1278d86ed7fbStbbdev
1279d86ed7fbStbbdev AssertH(s->origPtr != -1, 1003);
1280d86ed7fbStbbdev }
1281d86ed7fbStbbdev
1282d86ed7fbStbbdev /*-------------------------------------------------------------*/
1283d86ed7fbStbbdev /*--- end blocksort.c ---*/
1284d86ed7fbStbbdev /*-------------------------------------------------------------*/
1285