xref: /TaskScheduler/ThirdParty/Squish/alpha.cpp (revision 2f083884)
1*2f083884Ss.makeev_local /* -----------------------------------------------------------------------------
2*2f083884Ss.makeev_local 
3*2f083884Ss.makeev_local 	Copyright (c) 2006 Simon Brown                          [email protected]
4*2f083884Ss.makeev_local 
5*2f083884Ss.makeev_local 	Permission is hereby granted, free of charge, to any person obtaining
6*2f083884Ss.makeev_local 	a copy of this software and associated documentation files (the
7*2f083884Ss.makeev_local 	"Software"), to	deal in the Software without restriction, including
8*2f083884Ss.makeev_local 	without limitation the rights to use, copy, modify, merge, publish,
9*2f083884Ss.makeev_local 	distribute, sublicense, and/or sell copies of the Software, and to
10*2f083884Ss.makeev_local 	permit persons to whom the Software is furnished to do so, subject to
11*2f083884Ss.makeev_local 	the following conditions:
12*2f083884Ss.makeev_local 
13*2f083884Ss.makeev_local 	The above copyright notice and this permission notice shall be included
14*2f083884Ss.makeev_local 	in all copies or substantial portions of the Software.
15*2f083884Ss.makeev_local 
16*2f083884Ss.makeev_local 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17*2f083884Ss.makeev_local 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18*2f083884Ss.makeev_local 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19*2f083884Ss.makeev_local 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20*2f083884Ss.makeev_local 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21*2f083884Ss.makeev_local 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22*2f083884Ss.makeev_local 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23*2f083884Ss.makeev_local 
24*2f083884Ss.makeev_local    -------------------------------------------------------------------------- */
25*2f083884Ss.makeev_local 
26*2f083884Ss.makeev_local #include "alpha.h"
27*2f083884Ss.makeev_local #include <algorithm>
28*2f083884Ss.makeev_local 
29*2f083884Ss.makeev_local namespace squish {
30*2f083884Ss.makeev_local 
FloatToInt(float a,int limit)31*2f083884Ss.makeev_local static int FloatToInt( float a, int limit )
32*2f083884Ss.makeev_local {
33*2f083884Ss.makeev_local 	// use ANSI round-to-zero behaviour to get round-to-nearest
34*2f083884Ss.makeev_local 	int i = ( int )( a + 0.5f );
35*2f083884Ss.makeev_local 
36*2f083884Ss.makeev_local 	// clamp to the limit
37*2f083884Ss.makeev_local 	if( i < 0 )
38*2f083884Ss.makeev_local 		i = 0;
39*2f083884Ss.makeev_local 	else if( i > limit )
40*2f083884Ss.makeev_local 		i = limit;
41*2f083884Ss.makeev_local 
42*2f083884Ss.makeev_local 	// done
43*2f083884Ss.makeev_local 	return i;
44*2f083884Ss.makeev_local }
45*2f083884Ss.makeev_local 
CompressAlphaDxt3(u8 const * rgba,int mask,void * block)46*2f083884Ss.makeev_local void CompressAlphaDxt3( u8 const* rgba, int mask, void* block )
47*2f083884Ss.makeev_local {
48*2f083884Ss.makeev_local 	u8* bytes = reinterpret_cast< u8* >( block );
49*2f083884Ss.makeev_local 
50*2f083884Ss.makeev_local 	// quantise and pack the alpha values pairwise
51*2f083884Ss.makeev_local 	for( int i = 0; i < 8; ++i )
52*2f083884Ss.makeev_local 	{
53*2f083884Ss.makeev_local 		// quantise down to 4 bits
54*2f083884Ss.makeev_local 		float alpha1 = ( float )rgba[8*i + 3] * ( 15.0f/255.0f );
55*2f083884Ss.makeev_local 		float alpha2 = ( float )rgba[8*i + 7] * ( 15.0f/255.0f );
56*2f083884Ss.makeev_local 		int quant1 = FloatToInt( alpha1, 15 );
57*2f083884Ss.makeev_local 		int quant2 = FloatToInt( alpha2, 15 );
58*2f083884Ss.makeev_local 
59*2f083884Ss.makeev_local 		// set alpha to zero where masked
60*2f083884Ss.makeev_local 		int bit1 = 1 << ( 2*i );
61*2f083884Ss.makeev_local 		int bit2 = 1 << ( 2*i + 1 );
62*2f083884Ss.makeev_local 		if( ( mask & bit1 ) == 0 )
63*2f083884Ss.makeev_local 			quant1 = 0;
64*2f083884Ss.makeev_local 		if( ( mask & bit2 ) == 0 )
65*2f083884Ss.makeev_local 			quant2 = 0;
66*2f083884Ss.makeev_local 
67*2f083884Ss.makeev_local 		// pack into the byte
68*2f083884Ss.makeev_local 		bytes[i] = ( u8 )( quant1 | ( quant2 << 4 ) );
69*2f083884Ss.makeev_local 	}
70*2f083884Ss.makeev_local }
71*2f083884Ss.makeev_local 
DecompressAlphaDxt3(u8 * rgba,void const * block)72*2f083884Ss.makeev_local void DecompressAlphaDxt3( u8* rgba, void const* block )
73*2f083884Ss.makeev_local {
74*2f083884Ss.makeev_local 	u8 const* bytes = reinterpret_cast< u8 const* >( block );
75*2f083884Ss.makeev_local 
76*2f083884Ss.makeev_local 	// unpack the alpha values pairwise
77*2f083884Ss.makeev_local 	for( int i = 0; i < 8; ++i )
78*2f083884Ss.makeev_local 	{
79*2f083884Ss.makeev_local 		// quantise down to 4 bits
80*2f083884Ss.makeev_local 		u8 quant = bytes[i];
81*2f083884Ss.makeev_local 
82*2f083884Ss.makeev_local 		// unpack the values
83*2f083884Ss.makeev_local 		u8 lo = quant & 0x0f;
84*2f083884Ss.makeev_local 		u8 hi = quant & 0xf0;
85*2f083884Ss.makeev_local 
86*2f083884Ss.makeev_local 		// convert back up to bytes
87*2f083884Ss.makeev_local 		rgba[8*i + 3] = lo | ( lo << 4 );
88*2f083884Ss.makeev_local 		rgba[8*i + 7] = hi | ( hi >> 4 );
89*2f083884Ss.makeev_local 	}
90*2f083884Ss.makeev_local }
91*2f083884Ss.makeev_local 
FixRange(int & min,int & max,int steps)92*2f083884Ss.makeev_local static void FixRange( int& min, int& max, int steps )
93*2f083884Ss.makeev_local {
94*2f083884Ss.makeev_local 	if( max - min < steps )
95*2f083884Ss.makeev_local 		max = std::min( min + steps, 255 );
96*2f083884Ss.makeev_local 	if( max - min < steps )
97*2f083884Ss.makeev_local 		min = std::max( 0, max - steps );
98*2f083884Ss.makeev_local }
99*2f083884Ss.makeev_local 
FitCodes(u8 const * rgba,int mask,u8 const * codes,u8 * indices)100*2f083884Ss.makeev_local static int FitCodes( u8 const* rgba, int mask, u8 const* codes, u8* indices )
101*2f083884Ss.makeev_local {
102*2f083884Ss.makeev_local 	// fit each alpha value to the codebook
103*2f083884Ss.makeev_local 	int err = 0;
104*2f083884Ss.makeev_local 	for( int i = 0; i < 16; ++i )
105*2f083884Ss.makeev_local 	{
106*2f083884Ss.makeev_local 		// check this pixel is valid
107*2f083884Ss.makeev_local 		int bit = 1 << i;
108*2f083884Ss.makeev_local 		if( ( mask & bit ) == 0 )
109*2f083884Ss.makeev_local 		{
110*2f083884Ss.makeev_local 			// use the first code
111*2f083884Ss.makeev_local 			indices[i] = 0;
112*2f083884Ss.makeev_local 			continue;
113*2f083884Ss.makeev_local 		}
114*2f083884Ss.makeev_local 
115*2f083884Ss.makeev_local 		// find the least error and corresponding index
116*2f083884Ss.makeev_local 		int value = rgba[4*i + 3];
117*2f083884Ss.makeev_local 		int least = 2000000000;//INT_MAX;
118*2f083884Ss.makeev_local 		int index = 0;
119*2f083884Ss.makeev_local 		for( int j = 0; j < 8; ++j )
120*2f083884Ss.makeev_local 		{
121*2f083884Ss.makeev_local 			// get the squared error from this code
122*2f083884Ss.makeev_local 			int dist = ( int )value - ( int )codes[j];
123*2f083884Ss.makeev_local 			dist *= dist;
124*2f083884Ss.makeev_local 
125*2f083884Ss.makeev_local 			// compare with the best so far
126*2f083884Ss.makeev_local 			if( dist < least )
127*2f083884Ss.makeev_local 			{
128*2f083884Ss.makeev_local 				least = dist;
129*2f083884Ss.makeev_local 				index = j;
130*2f083884Ss.makeev_local 			}
131*2f083884Ss.makeev_local 		}
132*2f083884Ss.makeev_local 
133*2f083884Ss.makeev_local 		// save this index and accumulate the error
134*2f083884Ss.makeev_local 		indices[i] = ( u8 )index;
135*2f083884Ss.makeev_local 		err += least;
136*2f083884Ss.makeev_local 	}
137*2f083884Ss.makeev_local 
138*2f083884Ss.makeev_local 	// return the total error
139*2f083884Ss.makeev_local 	return err;
140*2f083884Ss.makeev_local }
141*2f083884Ss.makeev_local 
WriteAlphaBlock(int alpha0,int alpha1,u8 const * indices,void * block)142*2f083884Ss.makeev_local static void WriteAlphaBlock( int alpha0, int alpha1, u8 const* indices, void* block )
143*2f083884Ss.makeev_local {
144*2f083884Ss.makeev_local 	u8* bytes = reinterpret_cast< u8* >( block );
145*2f083884Ss.makeev_local 
146*2f083884Ss.makeev_local 	// write the first two bytes
147*2f083884Ss.makeev_local 	bytes[0] = ( u8 )alpha0;
148*2f083884Ss.makeev_local 	bytes[1] = ( u8 )alpha1;
149*2f083884Ss.makeev_local 
150*2f083884Ss.makeev_local 	// pack the indices with 3 bits each
151*2f083884Ss.makeev_local 	u8* dest = bytes + 2;
152*2f083884Ss.makeev_local 	u8 const* src = indices;
153*2f083884Ss.makeev_local 	for( int i = 0; i < 2; ++i )
154*2f083884Ss.makeev_local 	{
155*2f083884Ss.makeev_local 		// pack 8 3-bit values
156*2f083884Ss.makeev_local 		int value = 0;
157*2f083884Ss.makeev_local 		for( int j = 0; j < 8; ++j )
158*2f083884Ss.makeev_local 		{
159*2f083884Ss.makeev_local 			int index = *src++;
160*2f083884Ss.makeev_local 			value |= ( index << 3*j );
161*2f083884Ss.makeev_local 		}
162*2f083884Ss.makeev_local 
163*2f083884Ss.makeev_local 		// store in 3 bytes
164*2f083884Ss.makeev_local 		for( int j = 0; j < 3; ++j )
165*2f083884Ss.makeev_local 		{
166*2f083884Ss.makeev_local 			int byte = ( value >> 8*j ) & 0xff;
167*2f083884Ss.makeev_local 			*dest++ = ( u8 )byte;
168*2f083884Ss.makeev_local 		}
169*2f083884Ss.makeev_local 	}
170*2f083884Ss.makeev_local }
171*2f083884Ss.makeev_local 
WriteAlphaBlock5(int alpha0,int alpha1,u8 const * indices,void * block)172*2f083884Ss.makeev_local static void WriteAlphaBlock5( int alpha0, int alpha1, u8 const* indices, void* block )
173*2f083884Ss.makeev_local {
174*2f083884Ss.makeev_local 	// check the relative values of the endpoints
175*2f083884Ss.makeev_local 	if( alpha0 > alpha1 )
176*2f083884Ss.makeev_local 	{
177*2f083884Ss.makeev_local 		// swap the indices
178*2f083884Ss.makeev_local 		u8 swapped[16];
179*2f083884Ss.makeev_local 		for( int i = 0; i < 16; ++i )
180*2f083884Ss.makeev_local 		{
181*2f083884Ss.makeev_local 			u8 index = indices[i];
182*2f083884Ss.makeev_local 			if( index == 0 )
183*2f083884Ss.makeev_local 				swapped[i] = 1;
184*2f083884Ss.makeev_local 			else if( index == 1 )
185*2f083884Ss.makeev_local 				swapped[i] = 0;
186*2f083884Ss.makeev_local 			else if( index <= 5 )
187*2f083884Ss.makeev_local 				swapped[i] = 7 - index;
188*2f083884Ss.makeev_local 			else
189*2f083884Ss.makeev_local 				swapped[i] = index;
190*2f083884Ss.makeev_local 		}
191*2f083884Ss.makeev_local 
192*2f083884Ss.makeev_local 		// write the block
193*2f083884Ss.makeev_local 		WriteAlphaBlock( alpha1, alpha0, swapped, block );
194*2f083884Ss.makeev_local 	}
195*2f083884Ss.makeev_local 	else
196*2f083884Ss.makeev_local 	{
197*2f083884Ss.makeev_local 		// write the block
198*2f083884Ss.makeev_local 		WriteAlphaBlock( alpha0, alpha1, indices, block );
199*2f083884Ss.makeev_local 	}
200*2f083884Ss.makeev_local }
201*2f083884Ss.makeev_local 
WriteAlphaBlock7(int alpha0,int alpha1,u8 const * indices,void * block)202*2f083884Ss.makeev_local static void WriteAlphaBlock7( int alpha0, int alpha1, u8 const* indices, void* block )
203*2f083884Ss.makeev_local {
204*2f083884Ss.makeev_local 	// check the relative values of the endpoints
205*2f083884Ss.makeev_local 	if( alpha0 < alpha1 )
206*2f083884Ss.makeev_local 	{
207*2f083884Ss.makeev_local 		// swap the indices
208*2f083884Ss.makeev_local 		u8 swapped[16];
209*2f083884Ss.makeev_local 		for( int i = 0; i < 16; ++i )
210*2f083884Ss.makeev_local 		{
211*2f083884Ss.makeev_local 			u8 index = indices[i];
212*2f083884Ss.makeev_local 			if( index == 0 )
213*2f083884Ss.makeev_local 				swapped[i] = 1;
214*2f083884Ss.makeev_local 			else if( index == 1 )
215*2f083884Ss.makeev_local 				swapped[i] = 0;
216*2f083884Ss.makeev_local 			else
217*2f083884Ss.makeev_local 				swapped[i] = 9 - index;
218*2f083884Ss.makeev_local 		}
219*2f083884Ss.makeev_local 
220*2f083884Ss.makeev_local 		// write the block
221*2f083884Ss.makeev_local 		WriteAlphaBlock( alpha1, alpha0, swapped, block );
222*2f083884Ss.makeev_local 	}
223*2f083884Ss.makeev_local 	else
224*2f083884Ss.makeev_local 	{
225*2f083884Ss.makeev_local 		// write the block
226*2f083884Ss.makeev_local 		WriteAlphaBlock( alpha0, alpha1, indices, block );
227*2f083884Ss.makeev_local 	}
228*2f083884Ss.makeev_local }
229*2f083884Ss.makeev_local 
CompressAlphaDxt5(u8 const * rgba,int mask,void * block)230*2f083884Ss.makeev_local void CompressAlphaDxt5( u8 const* rgba, int mask, void* block )
231*2f083884Ss.makeev_local {
232*2f083884Ss.makeev_local 	// get the range for 5-alpha and 7-alpha interpolation
233*2f083884Ss.makeev_local 	int min5 = 255;
234*2f083884Ss.makeev_local 	int max5 = 0;
235*2f083884Ss.makeev_local 	int min7 = 255;
236*2f083884Ss.makeev_local 	int max7 = 0;
237*2f083884Ss.makeev_local 	for( int i = 0; i < 16; ++i )
238*2f083884Ss.makeev_local 	{
239*2f083884Ss.makeev_local 		// check this pixel is valid
240*2f083884Ss.makeev_local 		int bit = 1 << i;
241*2f083884Ss.makeev_local 		if( ( mask & bit ) == 0 )
242*2f083884Ss.makeev_local 			continue;
243*2f083884Ss.makeev_local 
244*2f083884Ss.makeev_local 		// incorporate into the min/max
245*2f083884Ss.makeev_local 		int value = rgba[4*i + 3];
246*2f083884Ss.makeev_local 		if( value < min7 )
247*2f083884Ss.makeev_local 			min7 = value;
248*2f083884Ss.makeev_local 		if( value > max7 )
249*2f083884Ss.makeev_local 			max7 = value;
250*2f083884Ss.makeev_local 		if( value != 0 && value < min5 )
251*2f083884Ss.makeev_local 			min5 = value;
252*2f083884Ss.makeev_local 		if( value != 255 && value > max5 )
253*2f083884Ss.makeev_local 			max5 = value;
254*2f083884Ss.makeev_local 	}
255*2f083884Ss.makeev_local 
256*2f083884Ss.makeev_local 	// handle the case that no valid range was found
257*2f083884Ss.makeev_local 	if( min5 > max5 )
258*2f083884Ss.makeev_local 		min5 = max5;
259*2f083884Ss.makeev_local 	if( min7 > max7 )
260*2f083884Ss.makeev_local 		min7 = max7;
261*2f083884Ss.makeev_local 
262*2f083884Ss.makeev_local 	// fix the range to be the minimum in each case
263*2f083884Ss.makeev_local 	FixRange( min5, max5, 5 );
264*2f083884Ss.makeev_local 	FixRange( min7, max7, 7 );
265*2f083884Ss.makeev_local 
266*2f083884Ss.makeev_local 	// set up the 5-alpha code book
267*2f083884Ss.makeev_local 	u8 codes5[8];
268*2f083884Ss.makeev_local 	codes5[0] = ( u8 )min5;
269*2f083884Ss.makeev_local 	codes5[1] = ( u8 )max5;
270*2f083884Ss.makeev_local 	for( int i = 1; i < 5; ++i )
271*2f083884Ss.makeev_local 		codes5[1 + i] = ( u8 )( ( ( 5 - i )*min5 + i*max5 )/5 );
272*2f083884Ss.makeev_local 	codes5[6] = 0;
273*2f083884Ss.makeev_local 	codes5[7] = 255;
274*2f083884Ss.makeev_local 
275*2f083884Ss.makeev_local 	// set up the 7-alpha code book
276*2f083884Ss.makeev_local 	u8 codes7[8];
277*2f083884Ss.makeev_local 	codes7[0] = ( u8 )min7;
278*2f083884Ss.makeev_local 	codes7[1] = ( u8 )max7;
279*2f083884Ss.makeev_local 	for( int i = 1; i < 7; ++i )
280*2f083884Ss.makeev_local 		codes7[1 + i] = ( u8 )( ( ( 7 - i )*min7 + i*max7 )/7 );
281*2f083884Ss.makeev_local 
282*2f083884Ss.makeev_local 	// fit the data to both code books
283*2f083884Ss.makeev_local 	u8 indices5[16];
284*2f083884Ss.makeev_local 	u8 indices7[16];
285*2f083884Ss.makeev_local 	int err5 = FitCodes( rgba, mask, codes5, indices5 );
286*2f083884Ss.makeev_local 	int err7 = FitCodes( rgba, mask, codes7, indices7 );
287*2f083884Ss.makeev_local 
288*2f083884Ss.makeev_local 	// save the block with least error
289*2f083884Ss.makeev_local 	if( err5 <= err7 )
290*2f083884Ss.makeev_local 		WriteAlphaBlock5( min5, max5, indices5, block );
291*2f083884Ss.makeev_local 	else
292*2f083884Ss.makeev_local 		WriteAlphaBlock7( min7, max7, indices7, block );
293*2f083884Ss.makeev_local }
294*2f083884Ss.makeev_local 
DecompressAlphaDxt5(u8 * rgba,void const * block)295*2f083884Ss.makeev_local void DecompressAlphaDxt5( u8* rgba, void const* block )
296*2f083884Ss.makeev_local {
297*2f083884Ss.makeev_local 	// get the two alpha values
298*2f083884Ss.makeev_local 	u8 const* bytes = reinterpret_cast< u8 const* >( block );
299*2f083884Ss.makeev_local 	int alpha0 = bytes[0];
300*2f083884Ss.makeev_local 	int alpha1 = bytes[1];
301*2f083884Ss.makeev_local 
302*2f083884Ss.makeev_local 	// compare the values to build the codebook
303*2f083884Ss.makeev_local 	u8 codes[8];
304*2f083884Ss.makeev_local 	codes[0] = ( u8 )alpha0;
305*2f083884Ss.makeev_local 	codes[1] = ( u8 )alpha1;
306*2f083884Ss.makeev_local 	if( alpha0 <= alpha1 )
307*2f083884Ss.makeev_local 	{
308*2f083884Ss.makeev_local 		// use 5-alpha codebook
309*2f083884Ss.makeev_local 		for( int i = 1; i < 5; ++i )
310*2f083884Ss.makeev_local 			codes[1 + i] = ( u8 )( ( ( 5 - i )*alpha0 + i*alpha1 )/5 );
311*2f083884Ss.makeev_local 		codes[6] = 0;
312*2f083884Ss.makeev_local 		codes[7] = 255;
313*2f083884Ss.makeev_local 	}
314*2f083884Ss.makeev_local 	else
315*2f083884Ss.makeev_local 	{
316*2f083884Ss.makeev_local 		// use 7-alpha codebook
317*2f083884Ss.makeev_local 		for( int i = 1; i < 7; ++i )
318*2f083884Ss.makeev_local 			codes[1 + i] = ( u8 )( ( ( 7 - i )*alpha0 + i*alpha1 )/7 );
319*2f083884Ss.makeev_local 	}
320*2f083884Ss.makeev_local 
321*2f083884Ss.makeev_local 	// decode the indices
322*2f083884Ss.makeev_local 	u8 indices[16];
323*2f083884Ss.makeev_local 	u8 const* src = bytes + 2;
324*2f083884Ss.makeev_local 	u8* dest = indices;
325*2f083884Ss.makeev_local 	for( int i = 0; i < 2; ++i )
326*2f083884Ss.makeev_local 	{
327*2f083884Ss.makeev_local 		// grab 3 bytes
328*2f083884Ss.makeev_local 		int value = 0;
329*2f083884Ss.makeev_local 		for( int j = 0; j < 3; ++j )
330*2f083884Ss.makeev_local 		{
331*2f083884Ss.makeev_local 			int byte = *src++;
332*2f083884Ss.makeev_local 			value |= ( byte << 8*j );
333*2f083884Ss.makeev_local 		}
334*2f083884Ss.makeev_local 
335*2f083884Ss.makeev_local 		// unpack 8 3-bit values from it
336*2f083884Ss.makeev_local 		for( int j = 0; j < 8; ++j )
337*2f083884Ss.makeev_local 		{
338*2f083884Ss.makeev_local 			int index = ( value >> 3*j ) & 0x7;
339*2f083884Ss.makeev_local 			*dest++ = ( u8 )index;
340*2f083884Ss.makeev_local 		}
341*2f083884Ss.makeev_local 	}
342*2f083884Ss.makeev_local 
343*2f083884Ss.makeev_local 	// write out the indexed codebook values
344*2f083884Ss.makeev_local 	for( int i = 0; i < 16; ++i )
345*2f083884Ss.makeev_local 		rgba[4*i + 3] = codes[indices[i]];
346*2f083884Ss.makeev_local }
347*2f083884Ss.makeev_local 
348*2f083884Ss.makeev_local } // namespace squish
349