1 /* 2 ** 2008 February 16 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** This file implements an object that represents a fixed-length 13 ** bitmap. Bits are numbered starting with 1. 14 ** 15 ** A bitmap is used to record which pages of a database file have been 16 ** journalled during a transaction, or which pages have the "dont-write" 17 ** property. Usually only a few pages are meet either condition. 18 ** So the bitmap is usually sparse and has low cardinality. 19 ** But sometimes (for example when during a DROP of a large table) most 20 ** or all of the pages in a database can get journalled. In those cases, 21 ** the bitmap becomes dense with high cardinality. The algorithm needs 22 ** to handle both cases well. 23 ** 24 ** The size of the bitmap is fixed when the object is created. 25 ** 26 ** All bits are clear when the bitmap is created. Individual bits 27 ** may be set or cleared one at a time. 28 ** 29 ** Test operations are about 100 times more common that set operations. 30 ** Clear operations are exceedingly rare. There are usually between 31 ** 5 and 500 set operations per Bitvec object, though the number of sets can 32 ** sometimes grow into tens of thousands or larger. The size of the 33 ** Bitvec object is the number of pages in the database file at the 34 ** start of a transaction, and is thus usually less than a few thousand, 35 ** but can be as large as 2 billion for a really big database. 36 ** 37 ** @(#) $Id: bitvec.c,v 1.14 2009/04/01 23:49:04 drh Exp $ 38 */ 39 #include "sqliteInt.h" 40 41 /* Size of the Bitvec structure in bytes. */ 42 #define BITVEC_SZ 512 43 44 /* Round the union size down to the nearest pointer boundary, since that's how 45 ** it will be aligned within the Bitvec struct. */ 46 #define BITVEC_USIZE (((BITVEC_SZ-(3*sizeof(u32)))/sizeof(Bitvec*))*sizeof(Bitvec*)) 47 48 /* Type of the array "element" for the bitmap representation. 49 ** Should be a power of 2, and ideally, evenly divide into BITVEC_USIZE. 50 ** Setting this to the "natural word" size of your CPU may improve 51 ** performance. */ 52 #define BITVEC_TELEM u8 53 /* Size, in bits, of the bitmap element. */ 54 #define BITVEC_SZELEM 8 55 /* Number of elements in a bitmap array. */ 56 #define BITVEC_NELEM (BITVEC_USIZE/sizeof(BITVEC_TELEM)) 57 /* Number of bits in the bitmap array. */ 58 #define BITVEC_NBIT (BITVEC_NELEM*BITVEC_SZELEM) 59 60 /* Number of u32 values in hash table. */ 61 #define BITVEC_NINT (BITVEC_USIZE/sizeof(u32)) 62 /* Maximum number of entries in hash table before 63 ** sub-dividing and re-hashing. */ 64 #define BITVEC_MXHASH (BITVEC_NINT/2) 65 /* Hashing function for the aHash representation. 66 ** Empirical testing showed that the *37 multiplier 67 ** (an arbitrary prime)in the hash function provided 68 ** no fewer collisions than the no-op *1. */ 69 #define BITVEC_HASH(X) (((X)*1)%BITVEC_NINT) 70 71 #define BITVEC_NPTR (BITVEC_USIZE/sizeof(Bitvec *)) 72 73 74 /* 75 ** A bitmap is an instance of the following structure. 76 ** 77 ** This bitmap records the existance of zero or more bits 78 ** with values between 1 and iSize, inclusive. 79 ** 80 ** There are three possible representations of the bitmap. 81 ** If iSize<=BITVEC_NBIT, then Bitvec.u.aBitmap[] is a straight 82 ** bitmap. The least significant bit is bit 1. 83 ** 84 ** If iSize>BITVEC_NBIT and iDivisor==0 then Bitvec.u.aHash[] is 85 ** a hash table that will hold up to BITVEC_MXHASH distinct values. 86 ** 87 ** Otherwise, the value i is redirected into one of BITVEC_NPTR 88 ** sub-bitmaps pointed to by Bitvec.u.apSub[]. Each subbitmap 89 ** handles up to iDivisor separate values of i. apSub[0] holds 90 ** values between 1 and iDivisor. apSub[1] holds values between 91 ** iDivisor+1 and 2*iDivisor. apSub[N] holds values between 92 ** N*iDivisor+1 and (N+1)*iDivisor. Each subbitmap is normalized 93 ** to hold deal with values between 1 and iDivisor. 94 */ 95 struct Bitvec { 96 u32 iSize; /* Maximum bit index. Max iSize is 4,294,967,296. */ 97 u32 nSet; /* Number of bits that are set - only valid for aHash 98 ** element. Max is BITVEC_NINT. For BITVEC_SZ of 512, 99 ** this would be 125. */ 100 u32 iDivisor; /* Number of bits handled by each apSub[] entry. */ 101 /* Should >=0 for apSub element. */ 102 /* Max iDivisor is max(u32) / BITVEC_NPTR + 1. */ 103 /* For a BITVEC_SZ of 512, this would be 34,359,739. */ 104 union { 105 BITVEC_TELEM aBitmap[BITVEC_NELEM]; /* Bitmap representation */ 106 u32 aHash[BITVEC_NINT]; /* Hash table representation */ 107 Bitvec *apSub[BITVEC_NPTR]; /* Recursive representation */ 108 } u; 109 }; 110 111 /* 112 ** Create a new bitmap object able to handle bits between 0 and iSize, 113 ** inclusive. Return a pointer to the new object. Return NULL if 114 ** malloc fails. 115 */ 116 Bitvec *sqlite3BitvecCreate(u32 iSize){ 117 Bitvec *p; 118 assert( sizeof(*p)==BITVEC_SZ ); 119 p = sqlite3MallocZero( sizeof(*p) ); 120 if( p ){ 121 p->iSize = iSize; 122 } 123 return p; 124 } 125 126 /* 127 ** Check to see if the i-th bit is set. Return true or false. 128 ** If p is NULL (if the bitmap has not been created) or if 129 ** i is out of range, then return false. 130 */ 131 int sqlite3BitvecTest(Bitvec *p, u32 i){ 132 if( p==0 ) return 0; 133 if( i>p->iSize || i==0 ) return 0; 134 i--; 135 while( p->iDivisor ){ 136 u32 bin = i/p->iDivisor; 137 i = i%p->iDivisor; 138 p = p->u.apSub[bin]; 139 if (!p) { 140 return 0; 141 } 142 } 143 if( p->iSize<=BITVEC_NBIT ){ 144 return (p->u.aBitmap[i/BITVEC_SZELEM] & (1<<(i&(BITVEC_SZELEM-1))))!=0; 145 } else{ 146 u32 h = BITVEC_HASH(i++); 147 while( p->u.aHash[h] ){ 148 if( p->u.aHash[h]==i ) return 1; 149 h++; 150 if( h>=BITVEC_NINT ) h = 0; 151 } 152 return 0; 153 } 154 } 155 156 /* 157 ** Set the i-th bit. Return 0 on success and an error code if 158 ** anything goes wrong. 159 ** 160 ** This routine might cause sub-bitmaps to be allocated. Failing 161 ** to get the memory needed to hold the sub-bitmap is the only 162 ** that can go wrong with an insert, assuming p and i are valid. 163 ** 164 ** The calling function must ensure that p is a valid Bitvec object 165 ** and that the value for "i" is within range of the Bitvec object. 166 ** Otherwise the behavior is undefined. 167 */ 168 int sqlite3BitvecSet(Bitvec *p, u32 i){ 169 u32 h; 170 assert( p!=0 ); 171 assert( i>0 ); 172 assert( i<=p->iSize ); 173 i--; 174 while((p->iSize > BITVEC_NBIT) && p->iDivisor) { 175 u32 bin = i/p->iDivisor; 176 i = i%p->iDivisor; 177 if( p->u.apSub[bin]==0 ){ 178 p->u.apSub[bin] = sqlite3BitvecCreate( p->iDivisor ); 179 if( p->u.apSub[bin]==0 ) return SQLITE_NOMEM; 180 } 181 p = p->u.apSub[bin]; 182 } 183 if( p->iSize<=BITVEC_NBIT ){ 184 p->u.aBitmap[i/BITVEC_SZELEM] |= 1 << (i&(BITVEC_SZELEM-1)); 185 return SQLITE_OK; 186 } 187 h = BITVEC_HASH(i++); 188 /* if there wasn't a hash collision, and this doesn't */ 189 /* completely fill the hash, then just add it without */ 190 /* worring about sub-dividing and re-hashing. */ 191 if( !p->u.aHash[h] ){ 192 if (p->nSet<(BITVEC_NINT-1)) { 193 goto bitvec_set_end; 194 } else { 195 goto bitvec_set_rehash; 196 } 197 } 198 /* there was a collision, check to see if it's already */ 199 /* in hash, if not, try to find a spot for it */ 200 do { 201 if( p->u.aHash[h]==i ) return SQLITE_OK; 202 h++; 203 if( h>=BITVEC_NINT ) h = 0; 204 } while( p->u.aHash[h] ); 205 /* we didn't find it in the hash. h points to the first */ 206 /* available free spot. check to see if this is going to */ 207 /* make our hash too "full". */ 208 bitvec_set_rehash: 209 if( p->nSet>=BITVEC_MXHASH ){ 210 unsigned int j; 211 int rc; 212 u32 aiValues[BITVEC_NINT]; 213 memcpy(aiValues, p->u.aHash, sizeof(aiValues)); 214 memset(p->u.apSub, 0, sizeof(aiValues)); 215 p->iDivisor = (p->iSize + BITVEC_NPTR - 1)/BITVEC_NPTR; 216 rc = sqlite3BitvecSet(p, i); 217 for(j=0; j<BITVEC_NINT; j++){ 218 if( aiValues[j] ) rc |= sqlite3BitvecSet(p, aiValues[j]); 219 } 220 return rc; 221 } 222 bitvec_set_end: 223 p->nSet++; 224 p->u.aHash[h] = i; 225 return SQLITE_OK; 226 } 227 228 /* 229 ** Clear the i-th bit. 230 */ 231 void sqlite3BitvecClear(Bitvec *p, u32 i){ 232 assert( p!=0 ); 233 assert( i>0 ); 234 i--; 235 while( p->iDivisor ){ 236 u32 bin = i/p->iDivisor; 237 i = i%p->iDivisor; 238 p = p->u.apSub[bin]; 239 if (!p) { 240 return; 241 } 242 } 243 if( p->iSize<=BITVEC_NBIT ){ 244 p->u.aBitmap[i/BITVEC_SZELEM] &= ~(1 << (i&(BITVEC_SZELEM-1))); 245 }else{ 246 unsigned int j; 247 u32 aiValues[BITVEC_NINT]; 248 memcpy(aiValues, p->u.aHash, sizeof(aiValues)); 249 memset(p->u.aHash, 0, sizeof(aiValues)); 250 p->nSet = 0; 251 for(j=0; j<BITVEC_NINT; j++){ 252 if( aiValues[j] && aiValues[j]!=(i+1) ){ 253 u32 h = BITVEC_HASH(aiValues[j]-1); 254 p->nSet++; 255 while( p->u.aHash[h] ){ 256 h++; 257 if( h>=BITVEC_NINT ) h = 0; 258 } 259 p->u.aHash[h] = aiValues[j]; 260 } 261 } 262 } 263 } 264 265 /* 266 ** Destroy a bitmap object. Reclaim all memory used. 267 */ 268 void sqlite3BitvecDestroy(Bitvec *p){ 269 if( p==0 ) return; 270 if( p->iDivisor ){ 271 unsigned int i; 272 for(i=0; i<BITVEC_NPTR; i++){ 273 sqlite3BitvecDestroy(p->u.apSub[i]); 274 } 275 } 276 sqlite3_free(p); 277 } 278 279 /* 280 ** Return the value of the iSize parameter specified when Bitvec *p 281 ** was created. 282 */ 283 u32 sqlite3BitvecSize(Bitvec *p){ 284 return p->iSize; 285 } 286 287 #ifndef SQLITE_OMIT_BUILTIN_TEST 288 /* 289 ** Let V[] be an array of unsigned characters sufficient to hold 290 ** up to N bits. Let I be an integer between 0 and N. 0<=I<N. 291 ** Then the following macros can be used to set, clear, or test 292 ** individual bits within V. 293 */ 294 #define SETBIT(V,I) V[I>>3] |= (1<<(I&7)) 295 #define CLEARBIT(V,I) V[I>>3] &= ~(1<<(I&7)) 296 #define TESTBIT(V,I) (V[I>>3]&(1<<(I&7)))!=0 297 298 /* 299 ** This routine runs an extensive test of the Bitvec code. 300 ** 301 ** The input is an array of integers that acts as a program 302 ** to test the Bitvec. The integers are opcodes followed 303 ** by 0, 1, or 3 operands, depending on the opcode. Another 304 ** opcode follows immediately after the last operand. 305 ** 306 ** There are 6 opcodes numbered from 0 through 5. 0 is the 307 ** "halt" opcode and causes the test to end. 308 ** 309 ** 0 Halt and return the number of errors 310 ** 1 N S X Set N bits beginning with S and incrementing by X 311 ** 2 N S X Clear N bits beginning with S and incrementing by X 312 ** 3 N Set N randomly chosen bits 313 ** 4 N Clear N randomly chosen bits 314 ** 5 N S X Set N bits from S increment X in array only, not in bitvec 315 ** 316 ** The opcodes 1 through 4 perform set and clear operations are performed 317 ** on both a Bitvec object and on a linear array of bits obtained from malloc. 318 ** Opcode 5 works on the linear array only, not on the Bitvec. 319 ** Opcode 5 is used to deliberately induce a fault in order to 320 ** confirm that error detection works. 321 ** 322 ** At the conclusion of the test the linear array is compared 323 ** against the Bitvec object. If there are any differences, 324 ** an error is returned. If they are the same, zero is returned. 325 ** 326 ** If a memory allocation error occurs, return -1. 327 */ 328 int sqlite3BitvecBuiltinTest(int sz, int *aOp){ 329 Bitvec *pBitvec = 0; 330 unsigned char *pV = 0; 331 int rc = -1; 332 int i, nx, pc, op; 333 334 /* Allocate the Bitvec to be tested and a linear array of 335 ** bits to act as the reference */ 336 pBitvec = sqlite3BitvecCreate( sz ); 337 pV = sqlite3_malloc( (sz+7)/8 + 1 ); 338 if( pBitvec==0 || pV==0 ) goto bitvec_end; 339 memset(pV, 0, (sz+7)/8 + 1); 340 341 /* Run the program */ 342 pc = 0; 343 while( (op = aOp[pc])!=0 ){ 344 switch( op ){ 345 case 1: 346 case 2: 347 case 5: { 348 nx = 4; 349 i = aOp[pc+2] - 1; 350 aOp[pc+2] += aOp[pc+3]; 351 break; 352 } 353 case 3: 354 case 4: 355 default: { 356 nx = 2; 357 sqlite3_randomness(sizeof(i), &i); 358 break; 359 } 360 } 361 if( (--aOp[pc+1]) > 0 ) nx = 0; 362 pc += nx; 363 i = (i & 0x7fffffff)%sz; 364 if( (op & 1)!=0 ){ 365 SETBIT(pV, (i+1)); 366 if( op!=5 ){ 367 if( sqlite3BitvecSet(pBitvec, i+1) ) goto bitvec_end; 368 } 369 }else{ 370 CLEARBIT(pV, (i+1)); 371 sqlite3BitvecClear(pBitvec, i+1); 372 } 373 } 374 375 /* Test to make sure the linear array exactly matches the 376 ** Bitvec object. Start with the assumption that they do 377 ** match (rc==0). Change rc to non-zero if a discrepancy 378 ** is found. 379 */ 380 rc = sqlite3BitvecTest(0,0) + sqlite3BitvecTest(pBitvec, sz+1) 381 + sqlite3BitvecTest(pBitvec, 0) 382 + (sqlite3BitvecSize(pBitvec) - sz); 383 for(i=1; i<=sz; i++){ 384 if( (TESTBIT(pV,i))!=sqlite3BitvecTest(pBitvec,i) ){ 385 rc = i; 386 break; 387 } 388 } 389 390 /* Free allocated structure */ 391 bitvec_end: 392 sqlite3_free(pV); 393 sqlite3BitvecDestroy(pBitvec); 394 return rc; 395 } 396 #endif /* SQLITE_OMIT_BUILTIN_TEST */ 397