1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*-------------------------------------------------------------*/
18d86ed7fbStbbdev /*--- Huffman coding low-level stuff ---*/
19d86ed7fbStbbdev /*--- huffman.cpp ---*/
20d86ed7fbStbbdev /*-------------------------------------------------------------*/
21d86ed7fbStbbdev
22d86ed7fbStbbdev /* ------------------------------------------------------------------
23d86ed7fbStbbdev The original source for this example:
24d86ed7fbStbbdev This file is part of bzip2/libbzip2, a program and library for
25d86ed7fbStbbdev lossless, block-sorting data compression.
26d86ed7fbStbbdev
27d86ed7fbStbbdev bzip2/libbzip2 version 1.0.6 of 6 September 2010
28d86ed7fbStbbdev Copyright (C) 1996-2010 Julian Seward <[email protected]>
29d86ed7fbStbbdev
30d86ed7fbStbbdev This program, "bzip2", the associated library "libbzip2", and all
31d86ed7fbStbbdev documentation, are copyright (C) 1996-2010 Julian R Seward. All
32d86ed7fbStbbdev rights reserved.
33d86ed7fbStbbdev
34d86ed7fbStbbdev Redistribution and use in source and binary forms, with or without
35d86ed7fbStbbdev modification, are permitted provided that the following conditions
36d86ed7fbStbbdev are met:
37d86ed7fbStbbdev
38d86ed7fbStbbdev 1. Redistributions of source code must retain the above copyright
39d86ed7fbStbbdev notice, this list of conditions and the following disclaimer.
40d86ed7fbStbbdev
41d86ed7fbStbbdev 2. The origin of this software must not be misrepresented; you must
42d86ed7fbStbbdev not claim that you wrote the original software. If you use this
43d86ed7fbStbbdev software in a product, an acknowledgment in the product
44d86ed7fbStbbdev documentation would be appreciated but is not required.
45d86ed7fbStbbdev
46d86ed7fbStbbdev 3. Altered source versions must be plainly marked as such, and must
47d86ed7fbStbbdev not be misrepresented as being the original software.
48d86ed7fbStbbdev
49d86ed7fbStbbdev 4. The name of the author may not be used to endorse or promote
50d86ed7fbStbbdev products derived from this software without specific prior written
51d86ed7fbStbbdev permission.
52d86ed7fbStbbdev
53d86ed7fbStbbdev THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54d86ed7fbStbbdev OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55d86ed7fbStbbdev WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56d86ed7fbStbbdev ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57d86ed7fbStbbdev DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58d86ed7fbStbbdev DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59d86ed7fbStbbdev GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60d86ed7fbStbbdev INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61d86ed7fbStbbdev WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62d86ed7fbStbbdev NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63d86ed7fbStbbdev SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64d86ed7fbStbbdev
65d86ed7fbStbbdev Julian Seward, [email protected]
66d86ed7fbStbbdev bzip2/libbzip2 version 1.0.6 of 6 September 2010
67d86ed7fbStbbdev ------------------------------------------------------------------ */
68d86ed7fbStbbdev
69d86ed7fbStbbdev #include "bzlib_private.hpp"
70d86ed7fbStbbdev
71d86ed7fbStbbdev /*---------------------------------------------------*/
72d86ed7fbStbbdev #define WEIGHTOF(zz0) ((zz0)&0xffffff00)
73d86ed7fbStbbdev #define DEPTHOF(zz1) ((zz1)&0x000000ff)
74d86ed7fbStbbdev #define MYMAX(zz2, zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
75d86ed7fbStbbdev
76d86ed7fbStbbdev #define ADDWEIGHTS(zw1, zw2) \
77d86ed7fbStbbdev (WEIGHTOF(zw1) + WEIGHTOF(zw2)) | (1 + MYMAX(DEPTHOF(zw1), DEPTHOF(zw2)))
78d86ed7fbStbbdev
79d86ed7fbStbbdev #define UPHEAP(z) \
80d86ed7fbStbbdev { \
81d86ed7fbStbbdev Int32 zz, tmp; \
82d86ed7fbStbbdev zz = z; \
83d86ed7fbStbbdev tmp = heap[zz]; \
84d86ed7fbStbbdev while (weight[tmp] < weight[heap[zz >> 1]]) { \
85d86ed7fbStbbdev heap[zz] = heap[zz >> 1]; \
86d86ed7fbStbbdev zz >>= 1; \
87d86ed7fbStbbdev } \
88d86ed7fbStbbdev heap[zz] = tmp; \
89d86ed7fbStbbdev }
90d86ed7fbStbbdev
91d86ed7fbStbbdev #define DOWNHEAP(z) \
92d86ed7fbStbbdev { \
93d86ed7fbStbbdev Int32 zz, yy, tmp; \
94d86ed7fbStbbdev zz = z; \
95d86ed7fbStbbdev tmp = heap[zz]; \
96d86ed7fbStbbdev while (True) { \
97d86ed7fbStbbdev yy = zz << 1; \
98d86ed7fbStbbdev if (yy > nHeap) \
99d86ed7fbStbbdev break; \
100d86ed7fbStbbdev if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) \
101d86ed7fbStbbdev yy++; \
102d86ed7fbStbbdev if (weight[tmp] < weight[heap[yy]]) \
103d86ed7fbStbbdev break; \
104d86ed7fbStbbdev heap[zz] = heap[yy]; \
105d86ed7fbStbbdev zz = yy; \
106d86ed7fbStbbdev } \
107d86ed7fbStbbdev heap[zz] = tmp; \
108d86ed7fbStbbdev }
109d86ed7fbStbbdev
110d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)111d86ed7fbStbbdev void BZ2_hbMakeCodeLengths(UChar *len, Int32 *freq, Int32 alphaSize, Int32 maxLen) {
112d86ed7fbStbbdev /*--
113d86ed7fbStbbdev Nodes and heap entries run from 1. Entry 0
114d86ed7fbStbbdev for both the heap and nodes is a sentinel.
115d86ed7fbStbbdev --*/
116d86ed7fbStbbdev Int32 nNodes, nHeap, n1, n2, i, j, k;
117d86ed7fbStbbdev Bool tooLong;
118d86ed7fbStbbdev
119d86ed7fbStbbdev Int32 heap[BZ_MAX_ALPHA_SIZE + 2];
120d86ed7fbStbbdev Int32 weight[BZ_MAX_ALPHA_SIZE * 2];
121d86ed7fbStbbdev Int32 parent[BZ_MAX_ALPHA_SIZE * 2];
122d86ed7fbStbbdev
123d86ed7fbStbbdev for (i = 0; i < alphaSize; i++)
124d86ed7fbStbbdev weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125d86ed7fbStbbdev
126d86ed7fbStbbdev while (True) {
127d86ed7fbStbbdev nNodes = alphaSize;
128d86ed7fbStbbdev nHeap = 0;
129d86ed7fbStbbdev
130d86ed7fbStbbdev heap[0] = 0;
131d86ed7fbStbbdev weight[0] = 0;
132d86ed7fbStbbdev parent[0] = -2;
133d86ed7fbStbbdev
134d86ed7fbStbbdev for (i = 1; i <= alphaSize; i++) {
135d86ed7fbStbbdev parent[i] = -1;
136d86ed7fbStbbdev nHeap++;
137d86ed7fbStbbdev heap[nHeap] = i;
138d86ed7fbStbbdev UPHEAP(nHeap);
139d86ed7fbStbbdev }
140d86ed7fbStbbdev
141d86ed7fbStbbdev AssertH(nHeap < (BZ_MAX_ALPHA_SIZE + 2), 2001);
142d86ed7fbStbbdev
143d86ed7fbStbbdev while (nHeap > 1) {
144d86ed7fbStbbdev n1 = heap[1];
145d86ed7fbStbbdev heap[1] = heap[nHeap];
146d86ed7fbStbbdev nHeap--;
147d86ed7fbStbbdev DOWNHEAP(1);
148d86ed7fbStbbdev n2 = heap[1];
149d86ed7fbStbbdev heap[1] = heap[nHeap];
150d86ed7fbStbbdev nHeap--;
151d86ed7fbStbbdev DOWNHEAP(1);
152d86ed7fbStbbdev nNodes++;
153d86ed7fbStbbdev parent[n1] = parent[n2] = nNodes;
154d86ed7fbStbbdev weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
155d86ed7fbStbbdev parent[nNodes] = -1;
156d86ed7fbStbbdev nHeap++;
157d86ed7fbStbbdev heap[nHeap] = nNodes;
158d86ed7fbStbbdev UPHEAP(nHeap);
159d86ed7fbStbbdev }
160d86ed7fbStbbdev
161d86ed7fbStbbdev AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
162d86ed7fbStbbdev
163d86ed7fbStbbdev tooLong = False;
164d86ed7fbStbbdev for (i = 1; i <= alphaSize; i++) {
165d86ed7fbStbbdev j = 0;
166d86ed7fbStbbdev k = i;
167d86ed7fbStbbdev while (parent[k] >= 0) {
168d86ed7fbStbbdev k = parent[k];
169d86ed7fbStbbdev j++;
170d86ed7fbStbbdev }
171d86ed7fbStbbdev len[i - 1] = j;
172d86ed7fbStbbdev if (j > maxLen)
173d86ed7fbStbbdev tooLong = True;
174d86ed7fbStbbdev }
175d86ed7fbStbbdev
176d86ed7fbStbbdev if (!tooLong)
177d86ed7fbStbbdev break;
178d86ed7fbStbbdev
179d86ed7fbStbbdev /* 17 Oct 04: keep-going condition for the following loop used
180d86ed7fbStbbdev to be 'i < alphaSize', which missed the last element,
181d86ed7fbStbbdev theoretically leading to the possibility of the compressor
182d86ed7fbStbbdev looping. However, this count-scaling step is only needed if
183d86ed7fbStbbdev one of the generated Huffman code words is longer than
184d86ed7fbStbbdev maxLen, which up to and including version 1.0.2 was 20 bits,
185d86ed7fbStbbdev which is extremely unlikely. In version 1.0.3 maxLen was
186d86ed7fbStbbdev changed to 17 bits, which has minimal effect on compression
187d86ed7fbStbbdev ratio, but does mean this scaling step is used from time to
188d86ed7fbStbbdev time, enough to verify that it works.
189d86ed7fbStbbdev
190d86ed7fbStbbdev This means that bzip2-1.0.3 and later will only produce
191d86ed7fbStbbdev Huffman codes with a maximum length of 17 bits. However, in
192d86ed7fbStbbdev order to preserve backwards compatibility with bitstreams
193d86ed7fbStbbdev produced by versions pre-1.0.3, the decompressor must still
194d86ed7fbStbbdev handle lengths of up to 20. */
195d86ed7fbStbbdev
196d86ed7fbStbbdev for (i = 1; i <= alphaSize; i++) {
197d86ed7fbStbbdev j = weight[i] >> 8;
198d86ed7fbStbbdev j = 1 + (j / 2);
199d86ed7fbStbbdev weight[i] = j << 8;
200d86ed7fbStbbdev }
201d86ed7fbStbbdev }
202d86ed7fbStbbdev }
203d86ed7fbStbbdev
204d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)205d86ed7fbStbbdev void BZ2_hbAssignCodes(Int32 *code, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize) {
206d86ed7fbStbbdev Int32 j, vec, i;
207d86ed7fbStbbdev
208d86ed7fbStbbdev vec = 0;
209d86ed7fbStbbdev for (j = minLen; j <= maxLen; j++) {
210d86ed7fbStbbdev for (i = 0; i < alphaSize; i++)
211d86ed7fbStbbdev if (length[i] == j) {
212d86ed7fbStbbdev code[i] = vec;
213d86ed7fbStbbdev vec++;
214d86ed7fbStbbdev };
215d86ed7fbStbbdev vec <<= 1;
216d86ed7fbStbbdev }
217d86ed7fbStbbdev }
218d86ed7fbStbbdev
219d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)220d86ed7fbStbbdev void BZ2_hbCreateDecodeTables(Int32 *limit,
221d86ed7fbStbbdev Int32 *base,
222d86ed7fbStbbdev Int32 *perm,
223d86ed7fbStbbdev UChar *length,
224d86ed7fbStbbdev Int32 minLen,
225d86ed7fbStbbdev Int32 maxLen,
226d86ed7fbStbbdev Int32 alphaSize) {
227d86ed7fbStbbdev Int32 pp, i, j, vec;
228d86ed7fbStbbdev
229d86ed7fbStbbdev pp = 0;
230d86ed7fbStbbdev for (i = minLen; i <= maxLen; i++)
231d86ed7fbStbbdev for (j = 0; j < alphaSize; j++)
232d86ed7fbStbbdev if (length[j] == i) {
233d86ed7fbStbbdev perm[pp] = j;
234d86ed7fbStbbdev pp++;
235d86ed7fbStbbdev };
236d86ed7fbStbbdev
237d86ed7fbStbbdev for (i = 0; i < BZ_MAX_CODE_LEN; i++)
238d86ed7fbStbbdev base[i] = 0;
239d86ed7fbStbbdev for (i = 0; i < alphaSize; i++)
240d86ed7fbStbbdev base[length[i] + 1]++;
241d86ed7fbStbbdev
242d86ed7fbStbbdev for (i = 1; i < BZ_MAX_CODE_LEN; i++)
243d86ed7fbStbbdev base[i] += base[i - 1];
244d86ed7fbStbbdev
245d86ed7fbStbbdev for (i = 0; i < BZ_MAX_CODE_LEN; i++)
246d86ed7fbStbbdev limit[i] = 0;
247d86ed7fbStbbdev vec = 0;
248d86ed7fbStbbdev
249d86ed7fbStbbdev for (i = minLen; i <= maxLen; i++) {
250d86ed7fbStbbdev vec += (base[i + 1] - base[i]);
251d86ed7fbStbbdev limit[i] = vec - 1;
252d86ed7fbStbbdev vec <<= 1;
253d86ed7fbStbbdev }
254d86ed7fbStbbdev for (i = minLen + 1; i <= maxLen; i++)
255d86ed7fbStbbdev base[i] = ((limit[i - 1] + 1) << 1) - base[i];
256d86ed7fbStbbdev }
257d86ed7fbStbbdev
258d86ed7fbStbbdev /*-------------------------------------------------------------*/
259d86ed7fbStbbdev /*--- end huffman.c ---*/
260d86ed7fbStbbdev /*-------------------------------------------------------------*/
261