xref: /oneTBB/examples/graph/fgbzip2/huffman.cpp (revision b15aabb3)
1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev /*-------------------------------------------------------------*/
18d86ed7fbStbbdev /*--- Huffman coding low-level stuff                        ---*/
19d86ed7fbStbbdev /*---                                           huffman.cpp ---*/
20d86ed7fbStbbdev /*-------------------------------------------------------------*/
21d86ed7fbStbbdev 
22d86ed7fbStbbdev /* ------------------------------------------------------------------
23d86ed7fbStbbdev    The original source for this example:
24d86ed7fbStbbdev    This file is part of bzip2/libbzip2, a program and library for
25d86ed7fbStbbdev    lossless, block-sorting data compression.
26d86ed7fbStbbdev 
27d86ed7fbStbbdev    bzip2/libbzip2 version 1.0.6 of 6 September 2010
28d86ed7fbStbbdev    Copyright (C) 1996-2010 Julian Seward <[email protected]>
29d86ed7fbStbbdev 
30d86ed7fbStbbdev    This program, "bzip2", the associated library "libbzip2", and all
31d86ed7fbStbbdev    documentation, are copyright (C) 1996-2010 Julian R Seward.  All
32d86ed7fbStbbdev    rights reserved.
33d86ed7fbStbbdev 
34d86ed7fbStbbdev    Redistribution and use in source and binary forms, with or without
35d86ed7fbStbbdev    modification, are permitted provided that the following conditions
36d86ed7fbStbbdev    are met:
37d86ed7fbStbbdev 
38d86ed7fbStbbdev    1. Redistributions of source code must retain the above copyright
39d86ed7fbStbbdev    notice, this list of conditions and the following disclaimer.
40d86ed7fbStbbdev 
41d86ed7fbStbbdev    2. The origin of this software must not be misrepresented; you must
42d86ed7fbStbbdev    not claim that you wrote the original software.  If you use this
43d86ed7fbStbbdev    software in a product, an acknowledgment in the product
44d86ed7fbStbbdev    documentation would be appreciated but is not required.
45d86ed7fbStbbdev 
46d86ed7fbStbbdev    3. Altered source versions must be plainly marked as such, and must
47d86ed7fbStbbdev    not be misrepresented as being the original software.
48d86ed7fbStbbdev 
49d86ed7fbStbbdev    4. The name of the author may not be used to endorse or promote
50d86ed7fbStbbdev    products derived from this software without specific prior written
51d86ed7fbStbbdev    permission.
52d86ed7fbStbbdev 
53d86ed7fbStbbdev    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54d86ed7fbStbbdev    OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55d86ed7fbStbbdev    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56d86ed7fbStbbdev    ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57d86ed7fbStbbdev    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58d86ed7fbStbbdev    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59d86ed7fbStbbdev    GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60d86ed7fbStbbdev    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61d86ed7fbStbbdev    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62d86ed7fbStbbdev    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63d86ed7fbStbbdev    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64d86ed7fbStbbdev 
65d86ed7fbStbbdev    Julian Seward, [email protected]
66d86ed7fbStbbdev    bzip2/libbzip2 version 1.0.6 of 6 September 2010
67d86ed7fbStbbdev    ------------------------------------------------------------------ */
68d86ed7fbStbbdev 
69d86ed7fbStbbdev #include "bzlib_private.hpp"
70d86ed7fbStbbdev 
71d86ed7fbStbbdev /*---------------------------------------------------*/
72d86ed7fbStbbdev #define WEIGHTOF(zz0)   ((zz0)&0xffffff00)
73d86ed7fbStbbdev #define DEPTHOF(zz1)    ((zz1)&0x000000ff)
74d86ed7fbStbbdev #define MYMAX(zz2, zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
75d86ed7fbStbbdev 
76d86ed7fbStbbdev #define ADDWEIGHTS(zw1, zw2) \
77d86ed7fbStbbdev     (WEIGHTOF(zw1) + WEIGHTOF(zw2)) | (1 + MYMAX(DEPTHOF(zw1), DEPTHOF(zw2)))
78d86ed7fbStbbdev 
79d86ed7fbStbbdev #define UPHEAP(z)                                     \
80d86ed7fbStbbdev     {                                                 \
81d86ed7fbStbbdev         Int32 zz, tmp;                                \
82d86ed7fbStbbdev         zz = z;                                       \
83d86ed7fbStbbdev         tmp = heap[zz];                               \
84d86ed7fbStbbdev         while (weight[tmp] < weight[heap[zz >> 1]]) { \
85d86ed7fbStbbdev             heap[zz] = heap[zz >> 1];                 \
86d86ed7fbStbbdev             zz >>= 1;                                 \
87d86ed7fbStbbdev         }                                             \
88d86ed7fbStbbdev         heap[zz] = tmp;                               \
89d86ed7fbStbbdev     }
90d86ed7fbStbbdev 
91d86ed7fbStbbdev #define DOWNHEAP(z)                                                    \
92d86ed7fbStbbdev     {                                                                  \
93d86ed7fbStbbdev         Int32 zz, yy, tmp;                                             \
94d86ed7fbStbbdev         zz = z;                                                        \
95d86ed7fbStbbdev         tmp = heap[zz];                                                \
96d86ed7fbStbbdev         while (True) {                                                 \
97d86ed7fbStbbdev             yy = zz << 1;                                              \
98d86ed7fbStbbdev             if (yy > nHeap)                                            \
99d86ed7fbStbbdev                 break;                                                 \
100d86ed7fbStbbdev             if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) \
101d86ed7fbStbbdev                 yy++;                                                  \
102d86ed7fbStbbdev             if (weight[tmp] < weight[heap[yy]])                        \
103d86ed7fbStbbdev                 break;                                                 \
104d86ed7fbStbbdev             heap[zz] = heap[yy];                                       \
105d86ed7fbStbbdev             zz = yy;                                                   \
106d86ed7fbStbbdev         }                                                              \
107d86ed7fbStbbdev         heap[zz] = tmp;                                                \
108d86ed7fbStbbdev     }
109d86ed7fbStbbdev 
110d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)111d86ed7fbStbbdev void BZ2_hbMakeCodeLengths(UChar *len, Int32 *freq, Int32 alphaSize, Int32 maxLen) {
112d86ed7fbStbbdev     /*--
113d86ed7fbStbbdev       Nodes and heap entries run from 1.  Entry 0
114d86ed7fbStbbdev       for both the heap and nodes is a sentinel.
115d86ed7fbStbbdev    --*/
116d86ed7fbStbbdev     Int32 nNodes, nHeap, n1, n2, i, j, k;
117d86ed7fbStbbdev     Bool tooLong;
118d86ed7fbStbbdev 
119d86ed7fbStbbdev     Int32 heap[BZ_MAX_ALPHA_SIZE + 2];
120d86ed7fbStbbdev     Int32 weight[BZ_MAX_ALPHA_SIZE * 2];
121d86ed7fbStbbdev     Int32 parent[BZ_MAX_ALPHA_SIZE * 2];
122d86ed7fbStbbdev 
123d86ed7fbStbbdev     for (i = 0; i < alphaSize; i++)
124d86ed7fbStbbdev         weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125d86ed7fbStbbdev 
126d86ed7fbStbbdev     while (True) {
127d86ed7fbStbbdev         nNodes = alphaSize;
128d86ed7fbStbbdev         nHeap = 0;
129d86ed7fbStbbdev 
130d86ed7fbStbbdev         heap[0] = 0;
131d86ed7fbStbbdev         weight[0] = 0;
132d86ed7fbStbbdev         parent[0] = -2;
133d86ed7fbStbbdev 
134d86ed7fbStbbdev         for (i = 1; i <= alphaSize; i++) {
135d86ed7fbStbbdev             parent[i] = -1;
136d86ed7fbStbbdev             nHeap++;
137d86ed7fbStbbdev             heap[nHeap] = i;
138d86ed7fbStbbdev             UPHEAP(nHeap);
139d86ed7fbStbbdev         }
140d86ed7fbStbbdev 
141d86ed7fbStbbdev         AssertH(nHeap < (BZ_MAX_ALPHA_SIZE + 2), 2001);
142d86ed7fbStbbdev 
143d86ed7fbStbbdev         while (nHeap > 1) {
144d86ed7fbStbbdev             n1 = heap[1];
145d86ed7fbStbbdev             heap[1] = heap[nHeap];
146d86ed7fbStbbdev             nHeap--;
147d86ed7fbStbbdev             DOWNHEAP(1);
148d86ed7fbStbbdev             n2 = heap[1];
149d86ed7fbStbbdev             heap[1] = heap[nHeap];
150d86ed7fbStbbdev             nHeap--;
151d86ed7fbStbbdev             DOWNHEAP(1);
152d86ed7fbStbbdev             nNodes++;
153d86ed7fbStbbdev             parent[n1] = parent[n2] = nNodes;
154d86ed7fbStbbdev             weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
155d86ed7fbStbbdev             parent[nNodes] = -1;
156d86ed7fbStbbdev             nHeap++;
157d86ed7fbStbbdev             heap[nHeap] = nNodes;
158d86ed7fbStbbdev             UPHEAP(nHeap);
159d86ed7fbStbbdev         }
160d86ed7fbStbbdev 
161d86ed7fbStbbdev         AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
162d86ed7fbStbbdev 
163d86ed7fbStbbdev         tooLong = False;
164d86ed7fbStbbdev         for (i = 1; i <= alphaSize; i++) {
165d86ed7fbStbbdev             j = 0;
166d86ed7fbStbbdev             k = i;
167d86ed7fbStbbdev             while (parent[k] >= 0) {
168d86ed7fbStbbdev                 k = parent[k];
169d86ed7fbStbbdev                 j++;
170d86ed7fbStbbdev             }
171d86ed7fbStbbdev             len[i - 1] = j;
172d86ed7fbStbbdev             if (j > maxLen)
173d86ed7fbStbbdev                 tooLong = True;
174d86ed7fbStbbdev         }
175d86ed7fbStbbdev 
176d86ed7fbStbbdev         if (!tooLong)
177d86ed7fbStbbdev             break;
178d86ed7fbStbbdev 
179d86ed7fbStbbdev         /* 17 Oct 04: keep-going condition for the following loop used
180d86ed7fbStbbdev          to be 'i < alphaSize', which missed the last element,
181d86ed7fbStbbdev          theoretically leading to the possibility of the compressor
182d86ed7fbStbbdev          looping.  However, this count-scaling step is only needed if
183d86ed7fbStbbdev          one of the generated Huffman code words is longer than
184d86ed7fbStbbdev          maxLen, which up to and including version 1.0.2 was 20 bits,
185d86ed7fbStbbdev          which is extremely unlikely.  In version 1.0.3 maxLen was
186d86ed7fbStbbdev          changed to 17 bits, which has minimal effect on compression
187d86ed7fbStbbdev          ratio, but does mean this scaling step is used from time to
188d86ed7fbStbbdev          time, enough to verify that it works.
189d86ed7fbStbbdev 
190d86ed7fbStbbdev          This means that bzip2-1.0.3 and later will only produce
191d86ed7fbStbbdev          Huffman codes with a maximum length of 17 bits.  However, in
192d86ed7fbStbbdev          order to preserve backwards compatibility with bitstreams
193d86ed7fbStbbdev          produced by versions pre-1.0.3, the decompressor must still
194d86ed7fbStbbdev          handle lengths of up to 20. */
195d86ed7fbStbbdev 
196d86ed7fbStbbdev         for (i = 1; i <= alphaSize; i++) {
197d86ed7fbStbbdev             j = weight[i] >> 8;
198d86ed7fbStbbdev             j = 1 + (j / 2);
199d86ed7fbStbbdev             weight[i] = j << 8;
200d86ed7fbStbbdev         }
201d86ed7fbStbbdev     }
202d86ed7fbStbbdev }
203d86ed7fbStbbdev 
204d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)205d86ed7fbStbbdev void BZ2_hbAssignCodes(Int32 *code, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize) {
206d86ed7fbStbbdev     Int32 j, vec, i;
207d86ed7fbStbbdev 
208d86ed7fbStbbdev     vec = 0;
209d86ed7fbStbbdev     for (j = minLen; j <= maxLen; j++) {
210d86ed7fbStbbdev         for (i = 0; i < alphaSize; i++)
211d86ed7fbStbbdev             if (length[i] == j) {
212d86ed7fbStbbdev                 code[i] = vec;
213d86ed7fbStbbdev                 vec++;
214d86ed7fbStbbdev             };
215d86ed7fbStbbdev         vec <<= 1;
216d86ed7fbStbbdev     }
217d86ed7fbStbbdev }
218d86ed7fbStbbdev 
219d86ed7fbStbbdev /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)220d86ed7fbStbbdev void BZ2_hbCreateDecodeTables(Int32 *limit,
221d86ed7fbStbbdev                               Int32 *base,
222d86ed7fbStbbdev                               Int32 *perm,
223d86ed7fbStbbdev                               UChar *length,
224d86ed7fbStbbdev                               Int32 minLen,
225d86ed7fbStbbdev                               Int32 maxLen,
226d86ed7fbStbbdev                               Int32 alphaSize) {
227d86ed7fbStbbdev     Int32 pp, i, j, vec;
228d86ed7fbStbbdev 
229d86ed7fbStbbdev     pp = 0;
230d86ed7fbStbbdev     for (i = minLen; i <= maxLen; i++)
231d86ed7fbStbbdev         for (j = 0; j < alphaSize; j++)
232d86ed7fbStbbdev             if (length[j] == i) {
233d86ed7fbStbbdev                 perm[pp] = j;
234d86ed7fbStbbdev                 pp++;
235d86ed7fbStbbdev             };
236d86ed7fbStbbdev 
237d86ed7fbStbbdev     for (i = 0; i < BZ_MAX_CODE_LEN; i++)
238d86ed7fbStbbdev         base[i] = 0;
239d86ed7fbStbbdev     for (i = 0; i < alphaSize; i++)
240d86ed7fbStbbdev         base[length[i] + 1]++;
241d86ed7fbStbbdev 
242d86ed7fbStbbdev     for (i = 1; i < BZ_MAX_CODE_LEN; i++)
243d86ed7fbStbbdev         base[i] += base[i - 1];
244d86ed7fbStbbdev 
245d86ed7fbStbbdev     for (i = 0; i < BZ_MAX_CODE_LEN; i++)
246d86ed7fbStbbdev         limit[i] = 0;
247d86ed7fbStbbdev     vec = 0;
248d86ed7fbStbbdev 
249d86ed7fbStbbdev     for (i = minLen; i <= maxLen; i++) {
250d86ed7fbStbbdev         vec += (base[i + 1] - base[i]);
251d86ed7fbStbbdev         limit[i] = vec - 1;
252d86ed7fbStbbdev         vec <<= 1;
253d86ed7fbStbbdev     }
254d86ed7fbStbbdev     for (i = minLen + 1; i <= maxLen; i++)
255d86ed7fbStbbdev         base[i] = ((limit[i - 1] + 1) << 1) - base[i];
256d86ed7fbStbbdev }
257d86ed7fbStbbdev 
258d86ed7fbStbbdev /*-------------------------------------------------------------*/
259d86ed7fbStbbdev /*--- end                                         huffman.c ---*/
260d86ed7fbStbbdev /*-------------------------------------------------------------*/
261