1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*-------------------------------------------------------------*/
18 /*--- Block sorting machinery ---*/
19 /*--- blocksort.cpp ---*/
20 /*-------------------------------------------------------------*/
21
22 /* ------------------------------------------------------------------
23 The original source for this example:
24 This file is part of bzip2/libbzip2, a program and library for
25 lossless, block-sorting data compression.
26
27 bzip2/libbzip2 version 1.0.6 of 6 September 2010
28 Copyright (C) 1996-2010 Julian Seward <[email protected]>
29
30 This program, "bzip2", the associated library "libbzip2", and all
31 documentation, are copyright (C) 1996-2010 Julian R Seward. All
32 rights reserved.
33
34 Redistribution and use in source and binary forms, with or without
35 modification, are permitted provided that the following conditions
36 are met:
37
38 1. Redistributions of source code must retain the above copyright
39 notice, this list of conditions and the following disclaimer.
40
41 2. The origin of this software must not be misrepresented; you must
42 not claim that you wrote the original software. If you use this
43 software in a product, an acknowledgment in the product
44 documentation would be appreciated but is not required.
45
46 3. Altered source versions must be plainly marked as such, and must
47 not be misrepresented as being the original software.
48
49 4. The name of the author may not be used to endorse or promote
50 products derived from this software without specific prior written
51 permission.
52
53 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64
65 Julian Seward, [email protected]
66 bzip2/libbzip2 version 1.0.6 of 6 September 2010
67 ------------------------------------------------------------------ */
68
69 #include "bzlib_private.hpp"
70
71 /*---------------------------------------------*/
72 /*--- Fallback O(N log(N)^2) sorting ---*/
73 /*--- algorithm, for repetitive blocks ---*/
74 /*---------------------------------------------*/
75
76 /*---------------------------------------------*/
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)77 static __inline__ void fallbackSimpleSort(UInt32* fmap, UInt32* eclass, Int32 lo, Int32 hi) {
78 Int32 i, j, tmp;
79 UInt32 ec_tmp;
80
81 if (lo == hi)
82 return;
83
84 if (hi - lo > 3) {
85 for (i = hi - 4; i >= lo; i--) {
86 tmp = fmap[i];
87 ec_tmp = eclass[tmp];
88 for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
89 fmap[j - 4] = fmap[j];
90 fmap[j - 4] = tmp;
91 }
92 }
93
94 for (i = hi - 1; i >= lo; i--) {
95 tmp = fmap[i];
96 ec_tmp = eclass[tmp];
97 for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
98 fmap[j - 1] = fmap[j];
99 fmap[j - 1] = tmp;
100 }
101 }
102
103 /*---------------------------------------------*/
104 #define fswap(zz1, zz2) \
105 { \
106 Int32 zztmp = zz1; \
107 zz1 = zz2; \
108 zz2 = zztmp; \
109 }
110
111 #define fvswap(zzp1, zzp2, zzn) \
112 { \
113 Int32 yyp1 = (zzp1); \
114 Int32 yyp2 = (zzp2); \
115 Int32 yyn = (zzn); \
116 while (yyn > 0) { \
117 fswap(fmap[yyp1], fmap[yyp2]); \
118 yyp1++; \
119 yyp2++; \
120 yyn--; \
121 } \
122 }
123
124 #define fmin(a, b) ((a) < (b)) ? (a) : (b)
125
126 #define fpush(lz, hz) \
127 { \
128 stackLo[sp] = lz; \
129 stackHi[sp] = hz; \
130 sp++; \
131 }
132
133 #define fpop(lz, hz) \
134 { \
135 sp--; \
136 lz = stackLo[sp]; \
137 hz = stackHi[sp]; \
138 }
139
140 #define FALLBACK_QSORT_SMALL_THRESH 10
141 #define FALLBACK_QSORT_STACK_SIZE 100
142
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)143 static void fallbackQSort3(UInt32* fmap, UInt32* eclass, Int32 loSt, Int32 hiSt) {
144 Int32 unLo, unHi, ltLo, gtHi, n, m;
145 Int32 sp, lo, hi;
146 UInt32 med, r, r3;
147 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
148 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
149
150 r = 0;
151
152 sp = 0;
153 fpush(loSt, hiSt);
154
155 while (sp > 0) {
156 AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
157
158 fpop(lo, hi);
159 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
160 fallbackSimpleSort(fmap, eclass, lo, hi);
161 continue;
162 }
163
164 /* Random partitioning. Median of 3 sometimes fails to
165 avoid bad cases. Median of 9 seems to help but
166 looks rather expensive. This too seems to work but
167 is cheaper. Guidance for the magic constants
168 7621 and 32768 is taken from Sedgewick's algorithms
169 book, chapter 35.
170 */
171 r = ((r * 7621) + 1) % 32768;
172 r3 = r % 3;
173 if (r3 == 0)
174 med = eclass[fmap[lo]];
175 else if (r3 == 1)
176 med = eclass[fmap[(lo + hi) >> 1]];
177 else
178 med = eclass[fmap[hi]];
179
180 unLo = ltLo = lo;
181 unHi = gtHi = hi;
182
183 while (1) {
184 while (1) {
185 if (unLo > unHi)
186 break;
187 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
188 if (n == 0) {
189 fswap(fmap[unLo], fmap[ltLo]);
190 ltLo++;
191 unLo++;
192 continue;
193 };
194 if (n > 0)
195 break;
196 unLo++;
197 }
198 while (1) {
199 if (unLo > unHi)
200 break;
201 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
202 if (n == 0) {
203 fswap(fmap[unHi], fmap[gtHi]);
204 gtHi--;
205 unHi--;
206 continue;
207 };
208 if (n < 0)
209 break;
210 unHi--;
211 }
212 if (unLo > unHi)
213 break;
214 fswap(fmap[unLo], fmap[unHi]);
215 unLo++;
216 unHi--;
217 }
218
219 AssertD(unHi == unLo - 1, "fallbackQSort3(2)");
220
221 if (gtHi < ltLo)
222 continue;
223
224 n = fmin(ltLo - lo, unLo - ltLo);
225 fvswap(lo, unLo - n, n);
226 m = fmin(hi - gtHi, gtHi - unHi);
227 fvswap(unLo, hi - m + 1, m);
228
229 n = lo + unLo - ltLo - 1;
230 m = hi - (gtHi - unHi) + 1;
231
232 if (n - lo > hi - m) {
233 fpush(lo, n);
234 fpush(m, hi);
235 }
236 else {
237 fpush(m, hi);
238 fpush(lo, n);
239 }
240 }
241 }
242
243 #undef fmin
244 #undef fpush
245 #undef fpop
246 #undef fswap
247 #undef fvswap
248 #undef FALLBACK_QSORT_SMALL_THRESH
249 #undef FALLBACK_QSORT_STACK_SIZE
250
251 /*---------------------------------------------*/
252 /* Pre:
253 nblock > 0
254 eclass exists for [0 .. nblock-1]
255 ((UChar*)eclass) [0 .. nblock-1] holds block
256 ptr exists for [0 .. nblock-1]
257
258 Post:
259 ((UChar*)eclass) [0 .. nblock-1] holds block
260 All other areas of eclass destroyed
261 fmap [0 .. nblock-1] holds sorted order
262 bhtab [ 0 .. 2+(nblock/32) ] destroyed
263 */
264
265 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz)&31))
266 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz)&31))
267 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz)&31)))
268 #define WORD_BH(zz) bhtab[(zz) >> 5]
269 #define UNALIGNED_BH(zz) ((zz)&0x01f)
270
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)271 static void fallbackSort(UInt32* fmap, UInt32* eclass, UInt32* bhtab, Int32 nblock, Int32 verb) {
272 Int32 ftab[257];
273 Int32 ftabCopy[256];
274 Int32 H, i, j, k, l, r, cc, cc1;
275 Int32 nNotDone;
276 Int32 nBhtab;
277 UChar* eclass8 = (UChar*)eclass;
278
279 /*--
280 Initial 1-char radix sort to generate
281 initial fmap and initial BH bits.
282 --*/
283 if (verb >= 4)
284 VPrintf0(" bucket sorting ...\n");
285 for (i = 0; i < 257; i++)
286 ftab[i] = 0;
287 for (i = 0; i < nblock; i++)
288 ftab[eclass8[i]]++;
289 for (i = 0; i < 256; i++)
290 ftabCopy[i] = ftab[i];
291 for (i = 1; i < 257; i++)
292 ftab[i] += ftab[i - 1];
293
294 for (i = 0; i < nblock; i++) {
295 j = eclass8[i];
296 k = ftab[j] - 1;
297 ftab[j] = k;
298 fmap[k] = i;
299 }
300
301 nBhtab = 2 + (nblock / 32);
302 for (i = 0; i < nBhtab; i++)
303 bhtab[i] = 0;
304 for (i = 0; i < 256; i++)
305 SET_BH(ftab[i]);
306
307 /*--
308 Inductively refine the buckets. Kind-of an
309 "exponential radix sort" (!), inspired by the
310 Manber-Myers suffix array construction algorithm.
311 --*/
312
313 /*-- set sentinel bits for block-end detection --*/
314 for (i = 0; i < 32; i++) {
315 SET_BH(nblock + 2 * i);
316 CLEAR_BH(nblock + 2 * i + 1);
317 }
318
319 /*-- the log(N) loop --*/
320 H = 1;
321 while (1) {
322 if (verb >= 4)
323 VPrintf1(" depth %6d has ", H);
324
325 j = 0;
326 for (i = 0; i < nblock; i++) {
327 if (ISSET_BH(i))
328 j = i;
329 k = fmap[i] - H;
330 if (k < 0)
331 k += nblock;
332 eclass[k] = j;
333 }
334
335 nNotDone = 0;
336 r = -1;
337 while (1) {
338 /*-- find the next non-singleton bucket --*/
339 k = r + 1;
340 while (ISSET_BH(k) && UNALIGNED_BH(k))
341 k++;
342 if (ISSET_BH(k)) {
343 while (WORD_BH(k) == 0xffffffff)
344 k += 32;
345 while (ISSET_BH(k))
346 k++;
347 }
348 l = k - 1;
349 if (l >= nblock)
350 break;
351 while (!ISSET_BH(k) && UNALIGNED_BH(k))
352 k++;
353 if (!ISSET_BH(k)) {
354 while (WORD_BH(k) == 0x00000000)
355 k += 32;
356 while (!ISSET_BH(k))
357 k++;
358 }
359 r = k - 1;
360 if (r >= nblock)
361 break;
362
363 /*-- now [l, r] bracket current bucket --*/
364 if (r > l) {
365 nNotDone += (r - l + 1);
366 fallbackQSort3(fmap, eclass, l, r);
367
368 /*-- scan bucket and generate header bits-- */
369 cc = -1;
370 for (i = l; i <= r; i++) {
371 cc1 = eclass[fmap[i]];
372 if (cc != cc1) {
373 SET_BH(i);
374 cc = cc1;
375 };
376 }
377 }
378 }
379
380 if (verb >= 4)
381 VPrintf1("%6d unresolved strings\n", nNotDone);
382
383 H *= 2;
384 if (H > nblock || nNotDone == 0)
385 break;
386 }
387
388 /*--
389 Reconstruct the original block in
390 eclass8 [0 .. nblock-1], since the
391 previous phase destroyed it.
392 --*/
393 if (verb >= 4)
394 VPrintf0(" reconstructing block ...\n");
395 j = 0;
396 for (i = 0; i < nblock; i++) {
397 while (ftabCopy[j] == 0)
398 j++;
399 ftabCopy[j]--;
400 eclass8[fmap[i]] = (UChar)j;
401 }
402 AssertH(j < 256, 1005);
403 }
404
405 #undef SET_BH
406 #undef CLEAR_BH
407 #undef ISSET_BH
408 #undef WORD_BH
409 #undef UNALIGNED_BH
410
411 /*---------------------------------------------*/
412 /*--- The main, O(N^2 log(N)) sorting ---*/
413 /*--- algorithm. Faster for "normal" ---*/
414 /*--- non-repetitive blocks. ---*/
415 /*---------------------------------------------*/
416
417 /*---------------------------------------------*/
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)418 static __inline__ Bool mainGtU(UInt32 i1,
419 UInt32 i2,
420 UChar* block,
421 UInt16* quadrant,
422 UInt32 nblock,
423 Int32* budget) {
424 Int32 k;
425 UChar c1, c2;
426 UInt16 s1, s2;
427
428 AssertD(i1 != i2, "mainGtU");
429 /* 1 */
430 c1 = block[i1];
431 c2 = block[i2];
432 if (c1 != c2)
433 return (c1 > c2);
434 i1++;
435 i2++;
436 /* 2 */
437 c1 = block[i1];
438 c2 = block[i2];
439 if (c1 != c2)
440 return (c1 > c2);
441 i1++;
442 i2++;
443 /* 3 */
444 c1 = block[i1];
445 c2 = block[i2];
446 if (c1 != c2)
447 return (c1 > c2);
448 i1++;
449 i2++;
450 /* 4 */
451 c1 = block[i1];
452 c2 = block[i2];
453 if (c1 != c2)
454 return (c1 > c2);
455 i1++;
456 i2++;
457 /* 5 */
458 c1 = block[i1];
459 c2 = block[i2];
460 if (c1 != c2)
461 return (c1 > c2);
462 i1++;
463 i2++;
464 /* 6 */
465 c1 = block[i1];
466 c2 = block[i2];
467 if (c1 != c2)
468 return (c1 > c2);
469 i1++;
470 i2++;
471 /* 7 */
472 c1 = block[i1];
473 c2 = block[i2];
474 if (c1 != c2)
475 return (c1 > c2);
476 i1++;
477 i2++;
478 /* 8 */
479 c1 = block[i1];
480 c2 = block[i2];
481 if (c1 != c2)
482 return (c1 > c2);
483 i1++;
484 i2++;
485 /* 9 */
486 c1 = block[i1];
487 c2 = block[i2];
488 if (c1 != c2)
489 return (c1 > c2);
490 i1++;
491 i2++;
492 /* 10 */
493 c1 = block[i1];
494 c2 = block[i2];
495 if (c1 != c2)
496 return (c1 > c2);
497 i1++;
498 i2++;
499 /* 11 */
500 c1 = block[i1];
501 c2 = block[i2];
502 if (c1 != c2)
503 return (c1 > c2);
504 i1++;
505 i2++;
506 /* 12 */
507 c1 = block[i1];
508 c2 = block[i2];
509 if (c1 != c2)
510 return (c1 > c2);
511 i1++;
512 i2++;
513
514 k = nblock + 8;
515
516 do {
517 /* 1 */
518 c1 = block[i1];
519 c2 = block[i2];
520 if (c1 != c2)
521 return (c1 > c2);
522 s1 = quadrant[i1];
523 s2 = quadrant[i2];
524 if (s1 != s2)
525 return (s1 > s2);
526 i1++;
527 i2++;
528 /* 2 */
529 c1 = block[i1];
530 c2 = block[i2];
531 if (c1 != c2)
532 return (c1 > c2);
533 s1 = quadrant[i1];
534 s2 = quadrant[i2];
535 if (s1 != s2)
536 return (s1 > s2);
537 i1++;
538 i2++;
539 /* 3 */
540 c1 = block[i1];
541 c2 = block[i2];
542 if (c1 != c2)
543 return (c1 > c2);
544 s1 = quadrant[i1];
545 s2 = quadrant[i2];
546 if (s1 != s2)
547 return (s1 > s2);
548 i1++;
549 i2++;
550 /* 4 */
551 c1 = block[i1];
552 c2 = block[i2];
553 if (c1 != c2)
554 return (c1 > c2);
555 s1 = quadrant[i1];
556 s2 = quadrant[i2];
557 if (s1 != s2)
558 return (s1 > s2);
559 i1++;
560 i2++;
561 /* 5 */
562 c1 = block[i1];
563 c2 = block[i2];
564 if (c1 != c2)
565 return (c1 > c2);
566 s1 = quadrant[i1];
567 s2 = quadrant[i2];
568 if (s1 != s2)
569 return (s1 > s2);
570 i1++;
571 i2++;
572 /* 6 */
573 c1 = block[i1];
574 c2 = block[i2];
575 if (c1 != c2)
576 return (c1 > c2);
577 s1 = quadrant[i1];
578 s2 = quadrant[i2];
579 if (s1 != s2)
580 return (s1 > s2);
581 i1++;
582 i2++;
583 /* 7 */
584 c1 = block[i1];
585 c2 = block[i2];
586 if (c1 != c2)
587 return (c1 > c2);
588 s1 = quadrant[i1];
589 s2 = quadrant[i2];
590 if (s1 != s2)
591 return (s1 > s2);
592 i1++;
593 i2++;
594 /* 8 */
595 c1 = block[i1];
596 c2 = block[i2];
597 if (c1 != c2)
598 return (c1 > c2);
599 s1 = quadrant[i1];
600 s2 = quadrant[i2];
601 if (s1 != s2)
602 return (s1 > s2);
603 i1++;
604 i2++;
605
606 if (i1 >= nblock)
607 i1 -= nblock;
608 if (i2 >= nblock)
609 i2 -= nblock;
610
611 k -= 8;
612 (*budget)--;
613 } while (k >= 0);
614
615 return False;
616 }
617
618 /*---------------------------------------------*/
619 /*--
620 Knuth's increments seem to work better
621 than Incerpi-Sedgewick here. Possibly
622 because the number of elements to sort
623 is usually small, typically <= 20.
624 --*/
625 static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093,
626 3280, 9841, 29524, 88573, 265720, 797161, 2391484 };
627
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)628 static void mainSimpleSort(UInt32* ptr,
629 UChar* block,
630 UInt16* quadrant,
631 Int32 nblock,
632 Int32 lo,
633 Int32 hi,
634 Int32 d,
635 Int32* budget) {
636 Int32 i, j, h, bigN, hp;
637 UInt32 v;
638
639 bigN = hi - lo + 1;
640 if (bigN < 2)
641 return;
642
643 hp = 0;
644 while (incs[hp] < bigN)
645 hp++;
646 hp--;
647
648 for (; hp >= 0; hp--) {
649 h = incs[hp];
650
651 i = lo + h;
652 while (True) {
653 /*-- copy 1 --*/
654 if (i > hi)
655 break;
656 v = ptr[i];
657 j = i;
658 while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
659 ptr[j] = ptr[j - h];
660 j = j - h;
661 if (j <= (lo + h - 1))
662 break;
663 }
664 ptr[j] = v;
665 i++;
666
667 /*-- copy 2 --*/
668 if (i > hi)
669 break;
670 v = ptr[i];
671 j = i;
672 while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
673 ptr[j] = ptr[j - h];
674 j = j - h;
675 if (j <= (lo + h - 1))
676 break;
677 }
678 ptr[j] = v;
679 i++;
680
681 /*-- copy 3 --*/
682 if (i > hi)
683 break;
684 v = ptr[i];
685 j = i;
686 while (mainGtU(ptr[j - h] + d, v + d, block, quadrant, nblock, budget)) {
687 ptr[j] = ptr[j - h];
688 j = j - h;
689 if (j <= (lo + h - 1))
690 break;
691 }
692 ptr[j] = v;
693 i++;
694
695 if (*budget < 0)
696 return;
697 }
698 }
699 }
700
701 /*---------------------------------------------*/
702 /*--
703 The following is an implementation of
704 an elegant 3-way quicksort for strings,
705 described in a paper "Fast Algorithms for
706 Sorting and Searching Strings", by Robert
707 Sedgewick and Jon L. Bentley.
708 --*/
709
710 #define mswap(zz1, zz2) \
711 { \
712 Int32 zztmp = zz1; \
713 zz1 = zz2; \
714 zz2 = zztmp; \
715 }
716
717 #define mvswap(zzp1, zzp2, zzn) \
718 { \
719 Int32 yyp1 = (zzp1); \
720 Int32 yyp2 = (zzp2); \
721 Int32 yyn = (zzn); \
722 while (yyn > 0) { \
723 mswap(ptr[yyp1], ptr[yyp2]); \
724 yyp1++; \
725 yyp2++; \
726 yyn--; \
727 } \
728 }
729
mmed3(UChar a,UChar b,UChar c)730 static __inline__ UChar mmed3(UChar a, UChar b, UChar c) {
731 UChar t;
732 if (a > b) {
733 t = a;
734 a = b;
735 b = t;
736 };
737 if (b > c) {
738 b = c;
739 if (a > b)
740 b = a;
741 }
742 return b;
743 }
744
745 #define mmin(a, b) ((a) < (b)) ? (a) : (b)
746
747 #define mpush(lz, hz, dz) \
748 { \
749 stackLo[sp] = lz; \
750 stackHi[sp] = hz; \
751 stackD[sp] = dz; \
752 sp++; \
753 }
754
755 #define mpop(lz, hz, dz) \
756 { \
757 sp--; \
758 lz = stackLo[sp]; \
759 hz = stackHi[sp]; \
760 dz = stackD[sp]; \
761 }
762
763 #define mnextsize(az) (nextHi[az] - nextLo[az])
764
765 #define mnextswap(az, bz) \
766 { \
767 Int32 tz; \
768 tz = nextLo[az]; \
769 nextLo[az] = nextLo[bz]; \
770 nextLo[bz] = tz; \
771 tz = nextHi[az]; \
772 nextHi[az] = nextHi[bz]; \
773 nextHi[bz] = tz; \
774 tz = nextD[az]; \
775 nextD[az] = nextD[bz]; \
776 nextD[bz] = tz; \
777 }
778
779 #define MAIN_QSORT_SMALL_THRESH 20
780 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
781 #define MAIN_QSORT_STACK_SIZE 100
782
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)783 static void mainQSort3(UInt32* ptr,
784 UChar* block,
785 UInt16* quadrant,
786 Int32 nblock,
787 Int32 loSt,
788 Int32 hiSt,
789 Int32 dSt,
790 Int32* budget) {
791 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
792 Int32 sp, lo, hi, d;
793
794 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
795 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
796 Int32 stackD[MAIN_QSORT_STACK_SIZE];
797
798 Int32 nextLo[3];
799 Int32 nextHi[3];
800 Int32 nextD[3];
801
802 sp = 0;
803 mpush(loSt, hiSt, dSt);
804
805 while (sp > 0) {
806 AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
807
808 mpop(lo, hi, d);
809 if (hi - lo < MAIN_QSORT_SMALL_THRESH || d > MAIN_QSORT_DEPTH_THRESH) {
810 mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
811 if (*budget < 0)
812 return;
813 continue;
814 }
815
816 med = (Int32)mmed3(block[ptr[lo] + d], block[ptr[hi] + d], block[ptr[(lo + hi) >> 1] + d]);
817
818 unLo = ltLo = lo;
819 unHi = gtHi = hi;
820
821 while (True) {
822 while (True) {
823 if (unLo > unHi)
824 break;
825 n = ((Int32)block[ptr[unLo] + d]) - med;
826 if (n == 0) {
827 mswap(ptr[unLo], ptr[ltLo]);
828 ltLo++;
829 unLo++;
830 continue;
831 };
832 if (n > 0)
833 break;
834 unLo++;
835 }
836 while (True) {
837 if (unLo > unHi)
838 break;
839 n = ((Int32)block[ptr[unHi] + d]) - med;
840 if (n == 0) {
841 mswap(ptr[unHi], ptr[gtHi]);
842 gtHi--;
843 unHi--;
844 continue;
845 };
846 if (n < 0)
847 break;
848 unHi--;
849 }
850 if (unLo > unHi)
851 break;
852 mswap(ptr[unLo], ptr[unHi]);
853 unLo++;
854 unHi--;
855 }
856
857 AssertD(unHi == unLo - 1, "mainQSort3(2)");
858
859 if (gtHi < ltLo) {
860 mpush(lo, hi, d + 1);
861 continue;
862 }
863
864 n = mmin(ltLo - lo, unLo - ltLo);
865 mvswap(lo, unLo - n, n);
866 m = mmin(hi - gtHi, gtHi - unHi);
867 mvswap(unLo, hi - m + 1, m);
868
869 n = lo + unLo - ltLo - 1;
870 m = hi - (gtHi - unHi) + 1;
871
872 nextLo[0] = lo;
873 nextHi[0] = n;
874 nextD[0] = d;
875 nextLo[1] = m;
876 nextHi[1] = hi;
877 nextD[1] = d;
878 nextLo[2] = n + 1;
879 nextHi[2] = m - 1;
880 nextD[2] = d + 1;
881
882 if (mnextsize(0) < mnextsize(1))
883 mnextswap(0, 1);
884 if (mnextsize(1) < mnextsize(2))
885 mnextswap(1, 2);
886 if (mnextsize(0) < mnextsize(1))
887 mnextswap(0, 1);
888
889 AssertD(mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
890 AssertD(mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
891
892 mpush(nextLo[0], nextHi[0], nextD[0]);
893 mpush(nextLo[1], nextHi[1], nextD[1]);
894 mpush(nextLo[2], nextHi[2], nextD[2]);
895 }
896 }
897
898 #undef mswap
899 #undef mvswap
900 #undef mpush
901 #undef mpop
902 #undef mmin
903 #undef mnextsize
904 #undef mnextswap
905 #undef MAIN_QSORT_SMALL_THRESH
906 #undef MAIN_QSORT_DEPTH_THRESH
907 #undef MAIN_QSORT_STACK_SIZE
908
909 /*---------------------------------------------*/
910 /* Pre:
911 nblock > N_OVERSHOOT
912 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
913 ((UChar*)block32) [0 .. nblock-1] holds block
914 ptr exists for [0 .. nblock-1]
915
916 Post:
917 ((UChar*)block32) [0 .. nblock-1] holds block
918 All other areas of block32 destroyed
919 ftab [0 .. 65536 ] destroyed
920 ptr [0 .. nblock-1] holds sorted order
921 if (*budget < 0), sorting was abandoned
922 */
923
924 #define BIGFREQ(b) (ftab[((b) + 1) << 8] - ftab[(b) << 8])
925 #define SETMASK (1 << 21)
926 #define CLEARMASK (~(SETMASK))
927
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)928 static void mainSort(UInt32* ptr,
929 UChar* block,
930 UInt16* quadrant,
931 UInt32* ftab,
932 Int32 nblock,
933 Int32 verb,
934 Int32* budget) {
935 Int32 i, j, k, ss, sb;
936 Int32 runningOrder[256];
937 Bool bigDone[256];
938 Int32 copyStart[256];
939 Int32 copyEnd[256];
940 UChar c1;
941 Int32 numQSorted;
942 UInt16 s;
943 if (verb >= 4)
944 VPrintf0(" main sort initialise ...\n");
945
946 /*-- set up the 2-byte frequency table --*/
947 for (i = 65536; i >= 0; i--)
948 ftab[i] = 0;
949
950 j = block[0] << 8;
951 i = nblock - 1;
952 for (; i >= 3; i -= 4) {
953 quadrant[i] = 0;
954 j = (j >> 8) | (((UInt16)block[i]) << 8);
955 ftab[j]++;
956 quadrant[i - 1] = 0;
957 j = (j >> 8) | (((UInt16)block[i - 1]) << 8);
958 ftab[j]++;
959 quadrant[i - 2] = 0;
960 j = (j >> 8) | (((UInt16)block[i - 2]) << 8);
961 ftab[j]++;
962 quadrant[i - 3] = 0;
963 j = (j >> 8) | (((UInt16)block[i - 3]) << 8);
964 ftab[j]++;
965 }
966 for (; i >= 0; i--) {
967 quadrant[i] = 0;
968 j = (j >> 8) | (((UInt16)block[i]) << 8);
969 ftab[j]++;
970 }
971
972 /*-- (emphasises close relationship of block & quadrant) --*/
973 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
974 block[nblock + i] = block[i];
975 quadrant[nblock + i] = 0;
976 }
977
978 if (verb >= 4)
979 VPrintf0(" bucket sorting ...\n");
980
981 /*-- Complete the initial radix sort --*/
982 for (i = 1; i <= 65536; i++)
983 ftab[i] += ftab[i - 1];
984
985 s = block[0] << 8;
986 i = nblock - 1;
987 for (; i >= 3; i -= 4) {
988 s = (s >> 8) | (block[i] << 8);
989 j = ftab[s] - 1;
990 ftab[s] = j;
991 ptr[j] = i;
992 s = (s >> 8) | (block[i - 1] << 8);
993 j = ftab[s] - 1;
994 ftab[s] = j;
995 ptr[j] = i - 1;
996 s = (s >> 8) | (block[i - 2] << 8);
997 j = ftab[s] - 1;
998 ftab[s] = j;
999 ptr[j] = i - 2;
1000 s = (s >> 8) | (block[i - 3] << 8);
1001 j = ftab[s] - 1;
1002 ftab[s] = j;
1003 ptr[j] = i - 3;
1004 }
1005 for (; i >= 0; i--) {
1006 s = (s >> 8) | (block[i] << 8);
1007 j = ftab[s] - 1;
1008 ftab[s] = j;
1009 ptr[j] = i;
1010 }
1011
1012 /*--
1013 Now ftab contains the first loc of every small bucket.
1014 Calculate the running order, from smallest to largest
1015 big bucket.
1016 --*/
1017 for (i = 0; i <= 255; i++) {
1018 bigDone[i] = False;
1019 runningOrder[i] = i;
1020 }
1021
1022 {
1023 Int32 vv;
1024 Int32 h = 1;
1025 do
1026 h = 3 * h + 1;
1027 while (h <= 256);
1028 do {
1029 h = h / 3;
1030 for (i = h; i <= 255; i++) {
1031 vv = runningOrder[i];
1032 j = i;
1033 while (BIGFREQ(runningOrder[j - h]) > BIGFREQ(vv)) {
1034 runningOrder[j] = runningOrder[j - h];
1035 j = j - h;
1036 if (j <= (h - 1))
1037 goto zero;
1038 }
1039 zero:
1040 runningOrder[j] = vv;
1041 }
1042 } while (h != 1);
1043 }
1044
1045 /*--
1046 The main sorting loop.
1047 --*/
1048
1049 numQSorted = 0;
1050
1051 for (i = 0; i <= 255; i++) {
1052 /*--
1053 Process big buckets, starting with the least full.
1054 Basically this is a 3-step process in which we call
1055 mainQSort3 to sort the small buckets [ss, j], but
1056 also make a big effort to avoid the calls if we can.
1057 --*/
1058 ss = runningOrder[i];
1059
1060 /*--
1061 Step 1:
1062 Complete the big bucket [ss] by quicksorting
1063 any unsorted small buckets [ss, j], for j != ss.
1064 Hopefully previous pointer-scanning phases have already
1065 completed many of the small buckets [ss, j], so
1066 we don't have to sort them at all.
1067 --*/
1068 for (j = 0; j <= 255; j++) {
1069 if (j != ss) {
1070 sb = (ss << 8) + j;
1071 if (!(ftab[sb] & SETMASK)) {
1072 Int32 lo = ftab[sb] & CLEARMASK;
1073 Int32 hi = (ftab[sb + 1] & CLEARMASK) - 1;
1074 if (hi > lo) {
1075 if (verb >= 4)
1076 VPrintf4(" qsort [0x%x, 0x%x] "
1077 "done %d this %d\n",
1078 ss,
1079 j,
1080 numQSorted,
1081 hi - lo + 1);
1082 mainQSort3(ptr, block, quadrant, nblock, lo, hi, BZ_N_RADIX, budget);
1083 numQSorted += (hi - lo + 1);
1084 if (*budget < 0)
1085 return;
1086 }
1087 }
1088 ftab[sb] |= SETMASK;
1089 }
1090 }
1091
1092 AssertH(!bigDone[ss], 1006);
1093
1094 /*--
1095 Step 2:
1096 Now scan this big bucket [ss] so as to synthesise the
1097 sorted order for small buckets [t, ss] for all t,
1098 including, magically, the bucket [ss,ss] too.
1099 This will avoid doing Real Work in subsequent Step 1's.
1100 --*/
1101 {
1102 for (j = 0; j <= 255; j++) {
1103 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
1104 copyEnd[j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
1105 }
1106 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
1107 k = ptr[j] - 1;
1108 if (k < 0)
1109 k += nblock;
1110 c1 = block[k];
1111 if (!bigDone[c1])
1112 ptr[copyStart[c1]++] = k;
1113 }
1114 for (j = (ftab[(ss + 1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
1115 k = ptr[j] - 1;
1116 if (k < 0)
1117 k += nblock;
1118 c1 = block[k];
1119 if (!bigDone[c1])
1120 ptr[copyEnd[c1]--] = k;
1121 }
1122 }
1123
1124 AssertH((copyStart[ss] - 1 == copyEnd[ss]) ||
1125 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
1126 Necessity for this case is demonstrated by compressing
1127 a sequence of approximately 48.5 million of character
1128 251; 1.0.0/1.0.1 will then die here. */
1129 (copyStart[ss] == 0 && copyEnd[ss] == nblock - 1),
1130 1007)
1131
1132 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
1133
1134 /*--
1135 Step 3:
1136 The [ss] big bucket is now done. Record this fact,
1137 and update the quadrant descriptors. Remember to
1138 update quadrants in the overshoot area too, if
1139 necessary. The "if (i < 255)" test merely skips
1140 this updating for the last bucket processed, since
1141 updating for the last bucket is pointless.
1142
1143 The quadrant array provides a way to incrementally
1144 cache sort orderings, as they appear, so as to
1145 make subsequent comparisons in fullGtU() complete
1146 faster. For repetitive blocks this makes a big
1147 difference (but not big enough to be able to avoid
1148 the fallback sorting mechanism, exponential radix sort).
1149
1150 The precise meaning is: at all times:
1151
1152 for 0 <= i < nblock and 0 <= j <= nblock
1153
1154 if block[i] != block[j],
1155
1156 then the relative values of quadrant[i] and
1157 quadrant[j] are meaningless.
1158
1159 else {
1160 if quadrant[i] < quadrant[j]
1161 then the string starting at i lexicographically
1162 precedes the string starting at j
1163
1164 else if quadrant[i] > quadrant[j]
1165 then the string starting at j lexicographically
1166 precedes the string starting at i
1167
1168 else
1169 the relative ordering of the strings starting
1170 at i and j has not yet been determined.
1171 }
1172 --*/
1173 bigDone[ss] = True;
1174
1175 if (i < 255) {
1176 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
1177 Int32 bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1178 Int32 shifts = 0;
1179
1180 while ((bbSize >> shifts) > 65534)
1181 shifts++;
1182
1183 for (j = bbSize - 1; j >= 0; j--) {
1184 Int32 a2update = ptr[bbStart + j];
1185 UInt16 qVal = (UInt16)(j >> shifts);
1186 quadrant[a2update] = qVal;
1187 if (a2update < BZ_N_OVERSHOOT)
1188 quadrant[a2update + nblock] = qVal;
1189 }
1190 AssertH(((bbSize - 1) >> shifts) <= 65535, 1002);
1191 }
1192 }
1193
1194 if (verb >= 4)
1195 VPrintf3(" %d pointers, %d sorted, %d scanned\n",
1196 nblock,
1197 numQSorted,
1198 nblock - numQSorted);
1199 }
1200
1201 #undef BIGFREQ
1202 #undef SETMASK
1203 #undef CLEARMASK
1204
1205 /*---------------------------------------------*/
1206 /* Pre:
1207 nblock > 0
1208 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1209 ((UChar*)arr2) [0 .. nblock-1] holds block
1210 arr1 exists for [0 .. nblock-1]
1211
1212 Post:
1213 ((UChar*)arr2) [0 .. nblock-1] holds block
1214 All other areas of block destroyed
1215 ftab [ 0 .. 65536 ] destroyed
1216 arr1 [0 .. nblock-1] holds sorted order
1217 */
BZ2_blockSort(EState * s)1218 void BZ2_blockSort(EState* s) {
1219 UInt32* ptr = s->ptr;
1220 UChar* block = s->block;
1221 UInt32* ftab = s->ftab;
1222 Int32 nblock = s->nblock;
1223 Int32 verb = s->verbosity;
1224 Int32 wfact = s->workFactor;
1225 UInt16* quadrant;
1226 Int32 budget;
1227 Int32 budgetInit;
1228 Int32 i;
1229
1230 if (nblock < 10000) {
1231 fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1232 }
1233 else {
1234 /* Calculate the location for quadrant, remembering to get
1235 the alignment right. Assumes that &(block[0]) is at least
1236 2-byte aligned -- this should be ok since block is really
1237 the first section of arr2.
1238 */
1239 i = nblock + BZ_N_OVERSHOOT;
1240 if (i & 1)
1241 i++;
1242 quadrant = (UInt16*)(&(block[i]));
1243
1244 /* (wfact-1) / 3 puts the default-factor-30
1245 transition point at very roughly the same place as
1246 with v0.1 and v0.9.0.
1247 Not that it particularly matters any more, since the
1248 resulting compressed stream is now the same regardless
1249 of whether or not we use the main sort or fallback sort.
1250 */
1251 if (wfact < 1)
1252 wfact = 1;
1253 if (wfact > 100)
1254 wfact = 100;
1255 budgetInit = nblock * ((wfact - 1) / 3);
1256 budget = budgetInit;
1257
1258 mainSort(ptr, block, quadrant, ftab, nblock, verb, &budget);
1259 if (verb >= 3)
1260 VPrintf3(" %d work, %d block, ratio %5.2f\n",
1261 budgetInit - budget,
1262 nblock,
1263 (float)(budgetInit - budget) / (float)(nblock == 0 ? 1 : nblock));
1264 if (budget < 0) {
1265 if (verb >= 2)
1266 VPrintf0(" too repetitive; using fallback"
1267 " sorting algorithm\n");
1268 fallbackSort(s->arr1, s->arr2, ftab, nblock, verb);
1269 }
1270 }
1271
1272 s->origPtr = -1;
1273 for (i = 0; i < s->nblock; i++)
1274 if (ptr[i] == 0) {
1275 s->origPtr = i;
1276 break;
1277 };
1278
1279 AssertH(s->origPtr != -1, 1003);
1280 }
1281
1282 /*-------------------------------------------------------------*/
1283 /*--- end blocksort.c ---*/
1284 /*-------------------------------------------------------------*/
1285