1 /* ----------------------------------------------------------------------------- 2 3 Copyright (c) 2006 Simon Brown [email protected] 4 Copyright (c) 2007 Ignacio Castano [email protected] 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be included 15 in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 25 -------------------------------------------------------------------------- */ 26 27 #include "clusterfit.h" 28 #include "colourset.h" 29 #include "colourblock.h" 30 #include <cfloat> 31 32 namespace squish { 33 34 ClusterFit::ClusterFit( ColourSet const* colours, int flags ) 35 : ColourFit( colours, flags ) 36 { 37 // set the iteration count 38 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1; 39 40 // initialise the best error 41 m_besterror = VEC4_CONST( FLT_MAX ); 42 43 // initialise the metric 44 bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 ); 45 if( perceptual ) 46 m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f ); 47 else 48 m_metric = VEC4_CONST( 1.0f ); 49 50 // cache some values 51 int const count = m_colours->GetCount(); 52 Vec3 const* values = m_colours->GetPoints(); 53 54 // get the covariance matrix 55 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() ); 56 57 // compute the principle component 58 m_principle = ComputePrincipleComponent( covariance ); 59 } 60 61 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration ) 62 { 63 // cache some values 64 int const count = m_colours->GetCount(); 65 Vec3 const* values = m_colours->GetPoints(); 66 67 // build the list of dot products 68 float dps[16]; 69 u8* order = ( u8* )m_order + 16*iteration; 70 for( int i = 0; i < count; ++i ) 71 { 72 dps[i] = Dot( values[i], axis ); 73 order[i] = ( u8 )i; 74 } 75 76 // stable sort using them 77 for( int i = 0; i < count; ++i ) 78 { 79 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j ) 80 { 81 std::swap( dps[j], dps[j - 1] ); 82 std::swap( order[j], order[j - 1] ); 83 } 84 } 85 86 // check this ordering is unique 87 for( int it = 0; it < iteration; ++it ) 88 { 89 u8 const* prev = ( u8* )m_order + 16*it; 90 bool same = true; 91 for( int i = 0; i < count; ++i ) 92 { 93 if( order[i] != prev[i] ) 94 { 95 same = false; 96 break; 97 } 98 } 99 if( same ) 100 return false; 101 } 102 103 // copy the ordering and weight all the points 104 Vec3 const* unweighted = m_colours->GetPoints(); 105 float const* weights = m_colours->GetWeights(); 106 m_xsum_wsum = VEC4_CONST( 0.0f ); 107 for( int i = 0; i < count; ++i ) 108 { 109 int j = order[i]; 110 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f ); 111 Vec4 w( weights[j] ); 112 Vec4 x = p*w; 113 m_points_weights[i] = x; 114 m_xsum_wsum += x; 115 } 116 return true; 117 } 118 119 void ClusterFit::Compress3( void* block ) 120 { 121 // declare variables 122 int const count = m_colours->GetCount(); 123 Vec4 const two = VEC4_CONST( 2.0 ); 124 Vec4 const one = VEC4_CONST( 1.0f ); 125 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f ); 126 Vec4 const zero = VEC4_CONST( 0.0f ); 127 Vec4 const half = VEC4_CONST( 0.5f ); 128 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 129 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 130 131 // prepare an ordering using the principle axis 132 ConstructOrdering( m_principle, 0 ); 133 134 // check all possible clusters and iterate on the total order 135 Vec4 beststart = VEC4_CONST( 0.0f ); 136 Vec4 bestend = VEC4_CONST( 0.0f ); 137 Vec4 besterror = m_besterror; 138 u8 bestindices[16]; 139 int bestiteration = 0; 140 int besti = 0, bestj = 0; 141 142 // loop over iterations (we avoid the case that all points in first or last cluster) 143 for( int iterationIndex = 0;; ) 144 { 145 // first cluster [0,i) is at the start 146 Vec4 part0 = VEC4_CONST( 0.0f ); 147 for( int i = 0; i < count; ++i ) 148 { 149 // second cluster [i,j) is half along 150 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); 151 int jmin = ( i == 0 ) ? 1 : i; 152 for( int j = jmin;; ) 153 { 154 // last cluster [j,count) is at the end 155 Vec4 part2 = m_xsum_wsum - part1 - part0; 156 157 // compute least squares terms directly 158 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 ); 159 Vec4 alpha2_sum = alphax_sum.SplatW(); 160 161 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 ); 162 Vec4 beta2_sum = betax_sum.SplatW(); 163 164 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW(); 165 166 // compute the least-squares optimal points 167 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); 168 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; 169 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; 170 171 // clamp to the grid 172 a = Min( one, Max( zero, a ) ); 173 b = Min( one, Max( zero, b ) ); 174 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; 175 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; 176 177 // compute the error (we skip the constant xxsum) 178 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 179 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 180 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 181 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 182 183 // apply the metric to the error term 184 Vec4 e5 = e4*m_metric; 185 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 186 187 // keep the solution if it wins 188 if( CompareAnyLessThan( error, besterror ) ) 189 { 190 beststart = a; 191 bestend = b; 192 besti = i; 193 bestj = j; 194 besterror = error; 195 bestiteration = iterationIndex; 196 } 197 198 // advance 199 if( j == count ) 200 break; 201 part1 += m_points_weights[j]; 202 ++j; 203 } 204 205 // advance 206 part0 += m_points_weights[i]; 207 } 208 209 // stop if we didn't improve in this iteration 210 if( bestiteration != iterationIndex ) 211 break; 212 213 // advance if possible 214 ++iterationIndex; 215 if( iterationIndex == m_iterationCount ) 216 break; 217 218 // stop if a new iteration is an ordering that has already been tried 219 Vec3 axis = ( bestend - beststart ).GetVec3(); 220 if( !ConstructOrdering( axis, iterationIndex ) ) 221 break; 222 } 223 224 // save the block if necessary 225 if( CompareAnyLessThan( besterror, m_besterror ) ) 226 { 227 // remap the indices 228 u8 const* order = ( u8* )m_order + 16*bestiteration; 229 230 u8 unordered[16]; 231 for( int m = 0; m < besti; ++m ) 232 unordered[order[m]] = 0; 233 for( int m = besti; m < bestj; ++m ) 234 unordered[order[m]] = 2; 235 for( int m = bestj; m < count; ++m ) 236 unordered[order[m]] = 1; 237 238 m_colours->RemapIndices( unordered, bestindices ); 239 240 // save the block 241 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 242 243 // save the error 244 m_besterror = besterror; 245 } 246 } 247 248 void ClusterFit::Compress4( void* block ) 249 { 250 // declare variables 251 int const count = m_colours->GetCount(); 252 Vec4 const two = VEC4_CONST( 2.0f ); 253 Vec4 const one = VEC4_CONST( 1.0f ); 254 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); 255 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); 256 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f ); 257 Vec4 const zero = VEC4_CONST( 0.0f ); 258 Vec4 const half = VEC4_CONST( 0.5f ); 259 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f ); 260 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); 261 262 // prepare an ordering using the principle axis 263 ConstructOrdering( m_principle, 0 ); 264 265 // check all possible clusters and iterate on the total order 266 Vec4 beststart = VEC4_CONST( 0.0f ); 267 Vec4 bestend = VEC4_CONST( 0.0f ); 268 Vec4 besterror = m_besterror; 269 u8 bestindices[16]; 270 int bestiteration = 0; 271 int besti = 0, bestj = 0, bestk = 0; 272 273 // loop over iterations (we avoid the case that all points in first or last cluster) 274 for( int iterationIndex = 0;; ) 275 { 276 // first cluster [0,i) is at the start 277 Vec4 part0 = VEC4_CONST( 0.0f ); 278 for( int i = 0; i < count; ++i ) 279 { 280 // second cluster [i,j) is one third along 281 Vec4 part1 = VEC4_CONST( 0.0f ); 282 for( int j = i;; ) 283 { 284 // third cluster [j,k) is two thirds along 285 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f ); 286 int kmin = ( j == 0 ) ? 1 : j; 287 for( int k = kmin;; ) 288 { 289 // last cluster [k,count) is at the end 290 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0; 291 292 // compute least squares terms directly 293 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) ); 294 Vec4 const alpha2_sum = alphax_sum.SplatW(); 295 296 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) ); 297 Vec4 const beta2_sum = betax_sum.SplatW(); 298 299 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW(); 300 301 // compute the least-squares optimal points 302 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) ); 303 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor; 304 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor; 305 306 // clamp to the grid 307 a = Min( one, Max( zero, a ) ); 308 b = Min( one, Max( zero, b ) ); 309 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp; 310 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp; 311 312 // compute the error (we skip the constant xxsum) 313 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum ); 314 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum ); 315 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 ); 316 Vec4 e4 = MultiplyAdd( two, e3, e1 ); 317 318 // apply the metric to the error term 319 Vec4 e5 = e4*m_metric; 320 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ(); 321 322 // keep the solution if it wins 323 if( CompareAnyLessThan( error, besterror ) ) 324 { 325 beststart = a; 326 bestend = b; 327 besterror = error; 328 besti = i; 329 bestj = j; 330 bestk = k; 331 bestiteration = iterationIndex; 332 } 333 334 // advance 335 if( k == count ) 336 break; 337 part2 += m_points_weights[k]; 338 ++k; 339 } 340 341 // advance 342 if( j == count ) 343 break; 344 part1 += m_points_weights[j]; 345 ++j; 346 } 347 348 // advance 349 part0 += m_points_weights[i]; 350 } 351 352 // stop if we didn't improve in this iteration 353 if( bestiteration != iterationIndex ) 354 break; 355 356 // advance if possible 357 ++iterationIndex; 358 if( iterationIndex == m_iterationCount ) 359 break; 360 361 // stop if a new iteration is an ordering that has already been tried 362 Vec3 axis = ( bestend - beststart ).GetVec3(); 363 if( !ConstructOrdering( axis, iterationIndex ) ) 364 break; 365 } 366 367 // save the block if necessary 368 if( CompareAnyLessThan( besterror, m_besterror ) ) 369 { 370 // remap the indices 371 u8 const* order = ( u8* )m_order + 16*bestiteration; 372 373 u8 unordered[16]; 374 for( int m = 0; m < besti; ++m ) 375 unordered[order[m]] = 0; 376 for( int m = besti; m < bestj; ++m ) 377 unordered[order[m]] = 2; 378 for( int m = bestj; m < bestk; ++m ) 379 unordered[order[m]] = 3; 380 for( int m = bestk; m < count; ++m ) 381 unordered[order[m]] = 1; 382 383 m_colours->RemapIndices( unordered, bestindices ); 384 385 // save the block 386 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block ); 387 388 // save the error 389 m_besterror = besterror; 390 } 391 } 392 393 } // namespace squish 394