1*2f083884Ss.makeev_local /* -----------------------------------------------------------------------------
2*2f083884Ss.makeev_local 
3*2f083884Ss.makeev_local 	Copyright (c) 2006 Simon Brown                          [email protected]
4*2f083884Ss.makeev_local 	Copyright (c) 2007 Ignacio Castano                   [email protected]
5*2f083884Ss.makeev_local 
6*2f083884Ss.makeev_local 	Permission is hereby granted, free of charge, to any person obtaining
7*2f083884Ss.makeev_local 	a copy of this software and associated documentation files (the
8*2f083884Ss.makeev_local 	"Software"), to	deal in the Software without restriction, including
9*2f083884Ss.makeev_local 	without limitation the rights to use, copy, modify, merge, publish,
10*2f083884Ss.makeev_local 	distribute, sublicense, and/or sell copies of the Software, and to
11*2f083884Ss.makeev_local 	permit persons to whom the Software is furnished to do so, subject to
12*2f083884Ss.makeev_local 	the following conditions:
13*2f083884Ss.makeev_local 
14*2f083884Ss.makeev_local 	The above copyright notice and this permission notice shall be included
15*2f083884Ss.makeev_local 	in all copies or substantial portions of the Software.
16*2f083884Ss.makeev_local 
17*2f083884Ss.makeev_local 	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18*2f083884Ss.makeev_local 	OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19*2f083884Ss.makeev_local 	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20*2f083884Ss.makeev_local 	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21*2f083884Ss.makeev_local 	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22*2f083884Ss.makeev_local 	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23*2f083884Ss.makeev_local 	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24*2f083884Ss.makeev_local 
25*2f083884Ss.makeev_local    -------------------------------------------------------------------------- */
26*2f083884Ss.makeev_local 
27*2f083884Ss.makeev_local #include "clusterfit.h"
28*2f083884Ss.makeev_local #include "colourset.h"
29*2f083884Ss.makeev_local #include "colourblock.h"
30*2f083884Ss.makeev_local #include <cfloat>
31*2f083884Ss.makeev_local 
32*2f083884Ss.makeev_local namespace squish {
33*2f083884Ss.makeev_local 
ClusterFit(ColourSet const * colours,int flags)34*2f083884Ss.makeev_local ClusterFit::ClusterFit( ColourSet const* colours, int flags )
35*2f083884Ss.makeev_local   : ColourFit( colours, flags )
36*2f083884Ss.makeev_local {
37*2f083884Ss.makeev_local 	// set the iteration count
38*2f083884Ss.makeev_local 	m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
39*2f083884Ss.makeev_local 
40*2f083884Ss.makeev_local 	// initialise the best error
41*2f083884Ss.makeev_local 	m_besterror = VEC4_CONST( FLT_MAX );
42*2f083884Ss.makeev_local 
43*2f083884Ss.makeev_local 	// initialise the metric
44*2f083884Ss.makeev_local 	bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
45*2f083884Ss.makeev_local 	if( perceptual )
46*2f083884Ss.makeev_local 		m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
47*2f083884Ss.makeev_local 	else
48*2f083884Ss.makeev_local 		m_metric = VEC4_CONST( 1.0f );
49*2f083884Ss.makeev_local 
50*2f083884Ss.makeev_local 	// cache some values
51*2f083884Ss.makeev_local 	int const count = m_colours->GetCount();
52*2f083884Ss.makeev_local 	Vec3 const* values = m_colours->GetPoints();
53*2f083884Ss.makeev_local 
54*2f083884Ss.makeev_local 	// get the covariance matrix
55*2f083884Ss.makeev_local 	Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
56*2f083884Ss.makeev_local 
57*2f083884Ss.makeev_local 	// compute the principle component
58*2f083884Ss.makeev_local 	m_principle = ComputePrincipleComponent( covariance );
59*2f083884Ss.makeev_local }
60*2f083884Ss.makeev_local 
ConstructOrdering(Vec3 const & axis,int iteration)61*2f083884Ss.makeev_local bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
62*2f083884Ss.makeev_local {
63*2f083884Ss.makeev_local 	// cache some values
64*2f083884Ss.makeev_local 	int const count = m_colours->GetCount();
65*2f083884Ss.makeev_local 	Vec3 const* values = m_colours->GetPoints();
66*2f083884Ss.makeev_local 
67*2f083884Ss.makeev_local 	// build the list of dot products
68*2f083884Ss.makeev_local 	float dps[16];
69*2f083884Ss.makeev_local 	u8* order = ( u8* )m_order + 16*iteration;
70*2f083884Ss.makeev_local 	for( int i = 0; i < count; ++i )
71*2f083884Ss.makeev_local 	{
72*2f083884Ss.makeev_local 		dps[i] = Dot( values[i], axis );
73*2f083884Ss.makeev_local 		order[i] = ( u8 )i;
74*2f083884Ss.makeev_local 	}
75*2f083884Ss.makeev_local 
76*2f083884Ss.makeev_local 	// stable sort using them
77*2f083884Ss.makeev_local 	for( int i = 0; i < count; ++i )
78*2f083884Ss.makeev_local 	{
79*2f083884Ss.makeev_local 		for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
80*2f083884Ss.makeev_local 		{
81*2f083884Ss.makeev_local 			std::swap( dps[j], dps[j - 1] );
82*2f083884Ss.makeev_local 			std::swap( order[j], order[j - 1] );
83*2f083884Ss.makeev_local 		}
84*2f083884Ss.makeev_local 	}
85*2f083884Ss.makeev_local 
86*2f083884Ss.makeev_local 	// check this ordering is unique
87*2f083884Ss.makeev_local 	for( int it = 0; it < iteration; ++it )
88*2f083884Ss.makeev_local 	{
89*2f083884Ss.makeev_local 		u8 const* prev = ( u8* )m_order + 16*it;
90*2f083884Ss.makeev_local 		bool same = true;
91*2f083884Ss.makeev_local 		for( int i = 0; i < count; ++i )
92*2f083884Ss.makeev_local 		{
93*2f083884Ss.makeev_local 			if( order[i] != prev[i] )
94*2f083884Ss.makeev_local 			{
95*2f083884Ss.makeev_local 				same = false;
96*2f083884Ss.makeev_local 				break;
97*2f083884Ss.makeev_local 			}
98*2f083884Ss.makeev_local 		}
99*2f083884Ss.makeev_local 		if( same )
100*2f083884Ss.makeev_local 			return false;
101*2f083884Ss.makeev_local 	}
102*2f083884Ss.makeev_local 
103*2f083884Ss.makeev_local 	// copy the ordering and weight all the points
104*2f083884Ss.makeev_local 	Vec3 const* unweighted = m_colours->GetPoints();
105*2f083884Ss.makeev_local 	float const* weights = m_colours->GetWeights();
106*2f083884Ss.makeev_local 	m_xsum_wsum = VEC4_CONST( 0.0f );
107*2f083884Ss.makeev_local 	for( int i = 0; i < count; ++i )
108*2f083884Ss.makeev_local 	{
109*2f083884Ss.makeev_local 		int j = order[i];
110*2f083884Ss.makeev_local 		Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
111*2f083884Ss.makeev_local 		Vec4 w( weights[j] );
112*2f083884Ss.makeev_local 		Vec4 x = p*w;
113*2f083884Ss.makeev_local 		m_points_weights[i] = x;
114*2f083884Ss.makeev_local 		m_xsum_wsum += x;
115*2f083884Ss.makeev_local 	}
116*2f083884Ss.makeev_local 	return true;
117*2f083884Ss.makeev_local }
118*2f083884Ss.makeev_local 
Compress3(void * block)119*2f083884Ss.makeev_local void ClusterFit::Compress3( void* block )
120*2f083884Ss.makeev_local {
121*2f083884Ss.makeev_local 	// declare variables
122*2f083884Ss.makeev_local 	int const count = m_colours->GetCount();
123*2f083884Ss.makeev_local 	Vec4 const two = VEC4_CONST( 2.0 );
124*2f083884Ss.makeev_local 	Vec4 const one = VEC4_CONST( 1.0f );
125*2f083884Ss.makeev_local 	Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
126*2f083884Ss.makeev_local 	Vec4 const zero = VEC4_CONST( 0.0f );
127*2f083884Ss.makeev_local 	Vec4 const half = VEC4_CONST( 0.5f );
128*2f083884Ss.makeev_local 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
129*2f083884Ss.makeev_local 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
130*2f083884Ss.makeev_local 
131*2f083884Ss.makeev_local 	// prepare an ordering using the principle axis
132*2f083884Ss.makeev_local 	ConstructOrdering( m_principle, 0 );
133*2f083884Ss.makeev_local 
134*2f083884Ss.makeev_local 	// check all possible clusters and iterate on the total order
135*2f083884Ss.makeev_local 	Vec4 beststart = VEC4_CONST( 0.0f );
136*2f083884Ss.makeev_local 	Vec4 bestend = VEC4_CONST( 0.0f );
137*2f083884Ss.makeev_local 	Vec4 besterror = m_besterror;
138*2f083884Ss.makeev_local 	u8 bestindices[16];
139*2f083884Ss.makeev_local 	int bestiteration = 0;
140*2f083884Ss.makeev_local 	int besti = 0, bestj = 0;
141*2f083884Ss.makeev_local 
142*2f083884Ss.makeev_local 	// loop over iterations (we avoid the case that all points in first or last cluster)
143*2f083884Ss.makeev_local 	for( int iterationIndex = 0;; )
144*2f083884Ss.makeev_local 	{
145*2f083884Ss.makeev_local 		// first cluster [0,i) is at the start
146*2f083884Ss.makeev_local 		Vec4 part0 = VEC4_CONST( 0.0f );
147*2f083884Ss.makeev_local 		for( int i = 0; i < count; ++i )
148*2f083884Ss.makeev_local 		{
149*2f083884Ss.makeev_local 			// second cluster [i,j) is half along
150*2f083884Ss.makeev_local 			Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
151*2f083884Ss.makeev_local 			int jmin = ( i == 0 ) ? 1 : i;
152*2f083884Ss.makeev_local 			for( int j = jmin;; )
153*2f083884Ss.makeev_local 			{
154*2f083884Ss.makeev_local 				// last cluster [j,count) is at the end
155*2f083884Ss.makeev_local 				Vec4 part2 = m_xsum_wsum - part1 - part0;
156*2f083884Ss.makeev_local 
157*2f083884Ss.makeev_local 				// compute least squares terms directly
158*2f083884Ss.makeev_local 				Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
159*2f083884Ss.makeev_local 				Vec4 alpha2_sum = alphax_sum.SplatW();
160*2f083884Ss.makeev_local 
161*2f083884Ss.makeev_local 				Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
162*2f083884Ss.makeev_local 				Vec4 beta2_sum = betax_sum.SplatW();
163*2f083884Ss.makeev_local 
164*2f083884Ss.makeev_local 				Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
165*2f083884Ss.makeev_local 
166*2f083884Ss.makeev_local 				// compute the least-squares optimal points
167*2f083884Ss.makeev_local 				Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
168*2f083884Ss.makeev_local 				Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
169*2f083884Ss.makeev_local 				Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
170*2f083884Ss.makeev_local 
171*2f083884Ss.makeev_local 				// clamp to the grid
172*2f083884Ss.makeev_local 				a = Min( one, Max( zero, a ) );
173*2f083884Ss.makeev_local 				b = Min( one, Max( zero, b ) );
174*2f083884Ss.makeev_local 				a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
175*2f083884Ss.makeev_local 				b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
176*2f083884Ss.makeev_local 
177*2f083884Ss.makeev_local 				// compute the error (we skip the constant xxsum)
178*2f083884Ss.makeev_local 				Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
179*2f083884Ss.makeev_local 				Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
180*2f083884Ss.makeev_local 				Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
181*2f083884Ss.makeev_local 				Vec4 e4 = MultiplyAdd( two, e3, e1 );
182*2f083884Ss.makeev_local 
183*2f083884Ss.makeev_local 				// apply the metric to the error term
184*2f083884Ss.makeev_local 				Vec4 e5 = e4*m_metric;
185*2f083884Ss.makeev_local 				Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
186*2f083884Ss.makeev_local 
187*2f083884Ss.makeev_local 				// keep the solution if it wins
188*2f083884Ss.makeev_local 				if( CompareAnyLessThan( error, besterror ) )
189*2f083884Ss.makeev_local 				{
190*2f083884Ss.makeev_local 					beststart = a;
191*2f083884Ss.makeev_local 					bestend = b;
192*2f083884Ss.makeev_local 					besti = i;
193*2f083884Ss.makeev_local 					bestj = j;
194*2f083884Ss.makeev_local 					besterror = error;
195*2f083884Ss.makeev_local 					bestiteration = iterationIndex;
196*2f083884Ss.makeev_local 				}
197*2f083884Ss.makeev_local 
198*2f083884Ss.makeev_local 				// advance
199*2f083884Ss.makeev_local 				if( j == count )
200*2f083884Ss.makeev_local 					break;
201*2f083884Ss.makeev_local 				part1 += m_points_weights[j];
202*2f083884Ss.makeev_local 				++j;
203*2f083884Ss.makeev_local 			}
204*2f083884Ss.makeev_local 
205*2f083884Ss.makeev_local 			// advance
206*2f083884Ss.makeev_local 			part0 += m_points_weights[i];
207*2f083884Ss.makeev_local 		}
208*2f083884Ss.makeev_local 
209*2f083884Ss.makeev_local 		// stop if we didn't improve in this iteration
210*2f083884Ss.makeev_local 		if( bestiteration != iterationIndex )
211*2f083884Ss.makeev_local 			break;
212*2f083884Ss.makeev_local 
213*2f083884Ss.makeev_local 		// advance if possible
214*2f083884Ss.makeev_local 		++iterationIndex;
215*2f083884Ss.makeev_local 		if( iterationIndex == m_iterationCount )
216*2f083884Ss.makeev_local 			break;
217*2f083884Ss.makeev_local 
218*2f083884Ss.makeev_local 		// stop if a new iteration is an ordering that has already been tried
219*2f083884Ss.makeev_local 		Vec3 axis = ( bestend - beststart ).GetVec3();
220*2f083884Ss.makeev_local 		if( !ConstructOrdering( axis, iterationIndex ) )
221*2f083884Ss.makeev_local 			break;
222*2f083884Ss.makeev_local 	}
223*2f083884Ss.makeev_local 
224*2f083884Ss.makeev_local 	// save the block if necessary
225*2f083884Ss.makeev_local 	if( CompareAnyLessThan( besterror, m_besterror ) )
226*2f083884Ss.makeev_local 	{
227*2f083884Ss.makeev_local 		// remap the indices
228*2f083884Ss.makeev_local 		u8 const* order = ( u8* )m_order + 16*bestiteration;
229*2f083884Ss.makeev_local 
230*2f083884Ss.makeev_local 		u8 unordered[16];
231*2f083884Ss.makeev_local 		for( int m = 0; m < besti; ++m )
232*2f083884Ss.makeev_local 			unordered[order[m]] = 0;
233*2f083884Ss.makeev_local 		for( int m = besti; m < bestj; ++m )
234*2f083884Ss.makeev_local 			unordered[order[m]] = 2;
235*2f083884Ss.makeev_local 		for( int m = bestj; m < count; ++m )
236*2f083884Ss.makeev_local 			unordered[order[m]] = 1;
237*2f083884Ss.makeev_local 
238*2f083884Ss.makeev_local 		m_colours->RemapIndices( unordered, bestindices );
239*2f083884Ss.makeev_local 
240*2f083884Ss.makeev_local 		// save the block
241*2f083884Ss.makeev_local 		WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
242*2f083884Ss.makeev_local 
243*2f083884Ss.makeev_local 		// save the error
244*2f083884Ss.makeev_local 		m_besterror = besterror;
245*2f083884Ss.makeev_local 	}
246*2f083884Ss.makeev_local }
247*2f083884Ss.makeev_local 
Compress4(void * block)248*2f083884Ss.makeev_local void ClusterFit::Compress4( void* block )
249*2f083884Ss.makeev_local {
250*2f083884Ss.makeev_local 	// declare variables
251*2f083884Ss.makeev_local 	int const count = m_colours->GetCount();
252*2f083884Ss.makeev_local 	Vec4 const two = VEC4_CONST( 2.0f );
253*2f083884Ss.makeev_local 	Vec4 const one = VEC4_CONST( 1.0f );
254*2f083884Ss.makeev_local 	Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
255*2f083884Ss.makeev_local 	Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
256*2f083884Ss.makeev_local 	Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
257*2f083884Ss.makeev_local 	Vec4 const zero = VEC4_CONST( 0.0f );
258*2f083884Ss.makeev_local 	Vec4 const half = VEC4_CONST( 0.5f );
259*2f083884Ss.makeev_local 	Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
260*2f083884Ss.makeev_local 	Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
261*2f083884Ss.makeev_local 
262*2f083884Ss.makeev_local 	// prepare an ordering using the principle axis
263*2f083884Ss.makeev_local 	ConstructOrdering( m_principle, 0 );
264*2f083884Ss.makeev_local 
265*2f083884Ss.makeev_local 	// check all possible clusters and iterate on the total order
266*2f083884Ss.makeev_local 	Vec4 beststart = VEC4_CONST( 0.0f );
267*2f083884Ss.makeev_local 	Vec4 bestend = VEC4_CONST( 0.0f );
268*2f083884Ss.makeev_local 	Vec4 besterror = m_besterror;
269*2f083884Ss.makeev_local 	u8 bestindices[16];
270*2f083884Ss.makeev_local 	int bestiteration = 0;
271*2f083884Ss.makeev_local 	int besti = 0, bestj = 0, bestk = 0;
272*2f083884Ss.makeev_local 
273*2f083884Ss.makeev_local 	// loop over iterations (we avoid the case that all points in first or last cluster)
274*2f083884Ss.makeev_local 	for( int iterationIndex = 0;; )
275*2f083884Ss.makeev_local 	{
276*2f083884Ss.makeev_local 		// first cluster [0,i) is at the start
277*2f083884Ss.makeev_local 		Vec4 part0 = VEC4_CONST( 0.0f );
278*2f083884Ss.makeev_local 		for( int i = 0; i < count; ++i )
279*2f083884Ss.makeev_local 		{
280*2f083884Ss.makeev_local 			// second cluster [i,j) is one third along
281*2f083884Ss.makeev_local 			Vec4 part1 = VEC4_CONST( 0.0f );
282*2f083884Ss.makeev_local 			for( int j = i;; )
283*2f083884Ss.makeev_local 			{
284*2f083884Ss.makeev_local 				// third cluster [j,k) is two thirds along
285*2f083884Ss.makeev_local 				Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
286*2f083884Ss.makeev_local 				int kmin = ( j == 0 ) ? 1 : j;
287*2f083884Ss.makeev_local 				for( int k = kmin;; )
288*2f083884Ss.makeev_local 				{
289*2f083884Ss.makeev_local 					// last cluster [k,count) is at the end
290*2f083884Ss.makeev_local 					Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
291*2f083884Ss.makeev_local 
292*2f083884Ss.makeev_local 					// compute least squares terms directly
293*2f083884Ss.makeev_local 					Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
294*2f083884Ss.makeev_local 					Vec4 const alpha2_sum = alphax_sum.SplatW();
295*2f083884Ss.makeev_local 
296*2f083884Ss.makeev_local 					Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
297*2f083884Ss.makeev_local 					Vec4 const beta2_sum = betax_sum.SplatW();
298*2f083884Ss.makeev_local 
299*2f083884Ss.makeev_local 					Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
300*2f083884Ss.makeev_local 
301*2f083884Ss.makeev_local 					// compute the least-squares optimal points
302*2f083884Ss.makeev_local 					Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
303*2f083884Ss.makeev_local 					Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
304*2f083884Ss.makeev_local 					Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
305*2f083884Ss.makeev_local 
306*2f083884Ss.makeev_local 					// clamp to the grid
307*2f083884Ss.makeev_local 					a = Min( one, Max( zero, a ) );
308*2f083884Ss.makeev_local 					b = Min( one, Max( zero, b ) );
309*2f083884Ss.makeev_local 					a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
310*2f083884Ss.makeev_local 					b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
311*2f083884Ss.makeev_local 
312*2f083884Ss.makeev_local 					// compute the error (we skip the constant xxsum)
313*2f083884Ss.makeev_local 					Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
314*2f083884Ss.makeev_local 					Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
315*2f083884Ss.makeev_local 					Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
316*2f083884Ss.makeev_local 					Vec4 e4 = MultiplyAdd( two, e3, e1 );
317*2f083884Ss.makeev_local 
318*2f083884Ss.makeev_local 					// apply the metric to the error term
319*2f083884Ss.makeev_local 					Vec4 e5 = e4*m_metric;
320*2f083884Ss.makeev_local 					Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
321*2f083884Ss.makeev_local 
322*2f083884Ss.makeev_local 					// keep the solution if it wins
323*2f083884Ss.makeev_local 					if( CompareAnyLessThan( error, besterror ) )
324*2f083884Ss.makeev_local 					{
325*2f083884Ss.makeev_local 						beststart = a;
326*2f083884Ss.makeev_local 						bestend = b;
327*2f083884Ss.makeev_local 						besterror = error;
328*2f083884Ss.makeev_local 						besti = i;
329*2f083884Ss.makeev_local 						bestj = j;
330*2f083884Ss.makeev_local 						bestk = k;
331*2f083884Ss.makeev_local 						bestiteration = iterationIndex;
332*2f083884Ss.makeev_local 					}
333*2f083884Ss.makeev_local 
334*2f083884Ss.makeev_local 					// advance
335*2f083884Ss.makeev_local 					if( k == count )
336*2f083884Ss.makeev_local 						break;
337*2f083884Ss.makeev_local 					part2 += m_points_weights[k];
338*2f083884Ss.makeev_local 					++k;
339*2f083884Ss.makeev_local 				}
340*2f083884Ss.makeev_local 
341*2f083884Ss.makeev_local 				// advance
342*2f083884Ss.makeev_local 				if( j == count )
343*2f083884Ss.makeev_local 					break;
344*2f083884Ss.makeev_local 				part1 += m_points_weights[j];
345*2f083884Ss.makeev_local 				++j;
346*2f083884Ss.makeev_local 			}
347*2f083884Ss.makeev_local 
348*2f083884Ss.makeev_local 			// advance
349*2f083884Ss.makeev_local 			part0 += m_points_weights[i];
350*2f083884Ss.makeev_local 		}
351*2f083884Ss.makeev_local 
352*2f083884Ss.makeev_local 		// stop if we didn't improve in this iteration
353*2f083884Ss.makeev_local 		if( bestiteration != iterationIndex )
354*2f083884Ss.makeev_local 			break;
355*2f083884Ss.makeev_local 
356*2f083884Ss.makeev_local 		// advance if possible
357*2f083884Ss.makeev_local 		++iterationIndex;
358*2f083884Ss.makeev_local 		if( iterationIndex == m_iterationCount )
359*2f083884Ss.makeev_local 			break;
360*2f083884Ss.makeev_local 
361*2f083884Ss.makeev_local 		// stop if a new iteration is an ordering that has already been tried
362*2f083884Ss.makeev_local 		Vec3 axis = ( bestend - beststart ).GetVec3();
363*2f083884Ss.makeev_local 		if( !ConstructOrdering( axis, iterationIndex ) )
364*2f083884Ss.makeev_local 			break;
365*2f083884Ss.makeev_local 	}
366*2f083884Ss.makeev_local 
367*2f083884Ss.makeev_local 	// save the block if necessary
368*2f083884Ss.makeev_local 	if( CompareAnyLessThan( besterror, m_besterror ) )
369*2f083884Ss.makeev_local 	{
370*2f083884Ss.makeev_local 		// remap the indices
371*2f083884Ss.makeev_local 		u8 const* order = ( u8* )m_order + 16*bestiteration;
372*2f083884Ss.makeev_local 
373*2f083884Ss.makeev_local 		u8 unordered[16];
374*2f083884Ss.makeev_local 		for( int m = 0; m < besti; ++m )
375*2f083884Ss.makeev_local 			unordered[order[m]] = 0;
376*2f083884Ss.makeev_local 		for( int m = besti; m < bestj; ++m )
377*2f083884Ss.makeev_local 			unordered[order[m]] = 2;
378*2f083884Ss.makeev_local 		for( int m = bestj; m < bestk; ++m )
379*2f083884Ss.makeev_local 			unordered[order[m]] = 3;
380*2f083884Ss.makeev_local 		for( int m = bestk; m < count; ++m )
381*2f083884Ss.makeev_local 			unordered[order[m]] = 1;
382*2f083884Ss.makeev_local 
383*2f083884Ss.makeev_local 		m_colours->RemapIndices( unordered, bestindices );
384*2f083884Ss.makeev_local 
385*2f083884Ss.makeev_local 		// save the block
386*2f083884Ss.makeev_local 		WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
387*2f083884Ss.makeev_local 
388*2f083884Ss.makeev_local 		// save the error
389*2f083884Ss.makeev_local 		m_besterror = besterror;
390*2f083884Ss.makeev_local 	}
391*2f083884Ss.makeev_local }
392*2f083884Ss.makeev_local 
393*2f083884Ss.makeev_local } // namespace squish
394