1 /* 2 ** 2016-06-07 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This is a utility program that computes an SHA1 hash on the content 14 ** of an SQLite database. 15 ** 16 ** The hash is computed over just the content of the database. Free 17 ** space inside of the database file, and alternative on-disk representations 18 ** of the same content (ex: UTF8 vs UTF16) do not affect the hash. So, 19 ** for example, the database file page size, encoding, and auto_vacuum setting 20 ** can all be changed without changing the hash. 21 */ 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <stdarg.h> 25 #include <ctype.h> 26 #include <string.h> 27 #include <assert.h> 28 #include "sqlite3.h" 29 30 /* Context for the SHA1 hash */ 31 typedef struct SHA1Context SHA1Context; 32 struct SHA1Context { 33 unsigned int state[5]; 34 unsigned int count[2]; 35 unsigned char buffer[64]; 36 }; 37 38 /* 39 ** All global variables are gathered into the "g" singleton. 40 */ 41 struct GlobalVars { 42 const char *zArgv0; /* Name of program */ 43 unsigned fDebug; /* Debug flags */ 44 sqlite3 *db; /* The database connection */ 45 SHA1Context cx; /* SHA1 hash context */ 46 } g; 47 48 /* 49 ** Debugging flags 50 */ 51 #define DEBUG_FULLTRACE 0x00000001 /* Trace hash to stderr */ 52 53 /****************************************************************************** 54 ** The Hash Engine 55 ** 56 ** Modify these routines (and appropriate state fields in global variable 'g') 57 ** in order to compute a different (better?) hash of the database. 58 */ 59 /* 60 * blk0() and blk() perform the initial expand. 61 * I got the idea of expanding during the round function from SSLeay 62 * 63 * blk0le() for little-endian and blk0be() for big-endian. 64 */ 65 #if __GNUC__ && (defined(__i386__) || defined(__x86_64__)) 66 /* 67 * GCC by itself only generates left rotates. Use right rotates if 68 * possible to be kinder to dinky implementations with iterative rotate 69 * instructions. 70 */ 71 #define SHA_ROT(op, x, k) \ 72 ({ unsigned int y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; }) 73 #define rol(x,k) SHA_ROT("roll", x, k) 74 #define ror(x,k) SHA_ROT("rorl", x, k) 75 76 #else 77 /* Generic C equivalent */ 78 #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r)) 79 #define rol(x,k) SHA_ROT(x,k,32-(k)) 80 #define ror(x,k) SHA_ROT(x,32-(k),k) 81 #endif 82 83 84 #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \ 85 |(rol(block[i],8)&0x00FF00FF)) 86 #define blk0be(i) block[i] 87 #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \ 88 ^block[(i+2)&15]^block[i&15],1)) 89 90 /* 91 * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1 92 * 93 * Rl0() for little-endian and Rb0() for big-endian. Endianness is 94 * determined at run-time. 95 */ 96 #define Rl0(v,w,x,y,z,i) \ 97 z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2); 98 #define Rb0(v,w,x,y,z,i) \ 99 z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2); 100 #define R1(v,w,x,y,z,i) \ 101 z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2); 102 #define R2(v,w,x,y,z,i) \ 103 z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2); 104 #define R3(v,w,x,y,z,i) \ 105 z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2); 106 #define R4(v,w,x,y,z,i) \ 107 z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2); 108 109 /* 110 * Hash a single 512-bit block. This is the core of the algorithm. 111 */ 112 #define a qq[0] 113 #define b qq[1] 114 #define c qq[2] 115 #define d qq[3] 116 #define e qq[4] 117 118 void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){ 119 unsigned int qq[5]; /* a, b, c, d, e; */ 120 static int one = 1; 121 unsigned int block[16]; 122 memcpy(block, buffer, 64); 123 memcpy(qq,state,5*sizeof(unsigned int)); 124 125 /* Copy g.cx.state[] to working vars */ 126 /* 127 a = state[0]; 128 b = state[1]; 129 c = state[2]; 130 d = state[3]; 131 e = state[4]; 132 */ 133 134 /* 4 rounds of 20 operations each. Loop unrolled. */ 135 if( 1 == *(unsigned char*)&one ){ 136 Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3); 137 Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7); 138 Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11); 139 Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15); 140 }else{ 141 Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3); 142 Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7); 143 Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11); 144 Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15); 145 } 146 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19); 147 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23); 148 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27); 149 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31); 150 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35); 151 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39); 152 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43); 153 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47); 154 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51); 155 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55); 156 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59); 157 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63); 158 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67); 159 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71); 160 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75); 161 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79); 162 163 /* Add the working vars back into context.state[] */ 164 state[0] += a; 165 state[1] += b; 166 state[2] += c; 167 state[3] += d; 168 state[4] += e; 169 } 170 171 172 /* Initialize the SHA1 hash */ 173 static void hash_init(void){ 174 /* SHA1 initialization constants */ 175 g.cx.state[0] = 0x67452301; 176 g.cx.state[1] = 0xEFCDAB89; 177 g.cx.state[2] = 0x98BADCFE; 178 g.cx.state[3] = 0x10325476; 179 g.cx.state[4] = 0xC3D2E1F0; 180 g.cx.count[0] = g.cx.count[1] = 0; 181 } 182 183 /* Add new content to the SHA1 hash */ 184 static void hash_step(const unsigned char *data, unsigned int len){ 185 unsigned int i, j; 186 187 j = g.cx.count[0]; 188 if( (g.cx.count[0] += len << 3) < j ){ 189 g.cx.count[1] += (len>>29)+1; 190 } 191 j = (j >> 3) & 63; 192 if( (j + len) > 63 ){ 193 (void)memcpy(&g.cx.buffer[j], data, (i = 64-j)); 194 SHA1Transform(g.cx.state, g.cx.buffer); 195 for(; i + 63 < len; i += 64){ 196 SHA1Transform(g.cx.state, &data[i]); 197 } 198 j = 0; 199 }else{ 200 i = 0; 201 } 202 (void)memcpy(&g.cx.buffer[j], &data[i], len - i); 203 } 204 205 206 /* Add padding and compute and output the message digest. */ 207 static void hash_finish(const char *zName){ 208 unsigned int i; 209 unsigned char finalcount[8]; 210 unsigned char digest[20]; 211 static const char zEncode[] = "0123456789abcdef"; 212 char zOut[41]; 213 214 for (i = 0; i < 8; i++){ 215 finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)] 216 >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */ 217 } 218 hash_step((const unsigned char *)"\200", 1); 219 while ((g.cx.count[0] & 504) != 448){ 220 hash_step((const unsigned char *)"\0", 1); 221 } 222 hash_step(finalcount, 8); /* Should cause a SHA1Transform() */ 223 for (i = 0; i < 20; i++){ 224 digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255); 225 } 226 for(i=0; i<20; i++){ 227 zOut[i*2] = zEncode[(digest[i]>>4)&0xf]; 228 zOut[i*2+1] = zEncode[digest[i] & 0xf]; 229 } 230 zOut[i*2]= 0; 231 printf("%s %s\n", zOut, zName); 232 } 233 /* End of the hashing logic 234 *******************************************************************************/ 235 236 /* 237 ** Print an error resulting from faulting command-line arguments and 238 ** abort the program. 239 */ 240 static void cmdlineError(const char *zFormat, ...){ 241 va_list ap; 242 fprintf(stderr, "%s: ", g.zArgv0); 243 va_start(ap, zFormat); 244 vfprintf(stderr, zFormat, ap); 245 va_end(ap); 246 fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0); 247 exit(1); 248 } 249 250 /* 251 ** Print an error message for an error that occurs at runtime, then 252 ** abort the program. 253 */ 254 static void runtimeError(const char *zFormat, ...){ 255 va_list ap; 256 fprintf(stderr, "%s: ", g.zArgv0); 257 va_start(ap, zFormat); 258 vfprintf(stderr, zFormat, ap); 259 va_end(ap); 260 fprintf(stderr, "\n"); 261 exit(1); 262 } 263 264 /* 265 ** Prepare a new SQL statement. Print an error and abort if anything 266 ** goes wrong. 267 */ 268 static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){ 269 char *zSql; 270 int rc; 271 sqlite3_stmt *pStmt; 272 273 zSql = sqlite3_vmprintf(zFormat, ap); 274 if( zSql==0 ) runtimeError("out of memory"); 275 rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0); 276 if( rc ){ 277 runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db), 278 zSql); 279 } 280 sqlite3_free(zSql); 281 return pStmt; 282 } 283 static sqlite3_stmt *db_prepare(const char *zFormat, ...){ 284 va_list ap; 285 sqlite3_stmt *pStmt; 286 va_start(ap, zFormat); 287 pStmt = db_vprepare(zFormat, ap); 288 va_end(ap); 289 return pStmt; 290 } 291 292 /* 293 ** Compute the hash for all rows of the query formed from the printf-style 294 ** zFormat and its argument. 295 */ 296 static void hash_one_query(const char *zFormat, ...){ 297 va_list ap; 298 sqlite3_stmt *pStmt; /* The query defined by zFormat and "..." */ 299 int nCol; /* Number of columns in the result set */ 300 int i; /* Loop counter */ 301 302 /* Prepare the query defined by zFormat and "..." */ 303 va_start(ap, zFormat); 304 pStmt = db_vprepare(zFormat, ap); 305 va_end(ap); 306 nCol = sqlite3_column_count(pStmt); 307 308 /* Compute a hash over the result of the query */ 309 while( SQLITE_ROW==sqlite3_step(pStmt) ){ 310 for(i=0; i<nCol; i++){ 311 switch( sqlite3_column_type(pStmt,i) ){ 312 case SQLITE_NULL: { 313 hash_step((const unsigned char*)"0",1); 314 if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n"); 315 break; 316 } 317 case SQLITE_INTEGER: { 318 sqlite3_uint64 u; 319 int j; 320 unsigned char x[8]; 321 sqlite3_int64 v = sqlite3_column_int64(pStmt,i); 322 memcpy(&u, &v, 8); 323 for(j=7; j>=0; j--){ 324 x[j] = u & 0xff; 325 u >>= 8; 326 } 327 hash_step((const unsigned char*)"1",1); 328 hash_step(x,8); 329 if( g.fDebug & DEBUG_FULLTRACE ){ 330 fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i)); 331 } 332 break; 333 } 334 case SQLITE_FLOAT: { 335 sqlite3_uint64 u; 336 int j; 337 unsigned char x[8]; 338 double r = sqlite3_column_double(pStmt,i); 339 memcpy(&u, &r, 8); 340 for(j=7; j>=0; j--){ 341 x[j] = u & 0xff; 342 u >>= 8; 343 } 344 hash_step((const unsigned char*)"2",1); 345 hash_step(x,8); 346 if( g.fDebug & DEBUG_FULLTRACE ){ 347 fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i)); 348 } 349 break; 350 } 351 case SQLITE_TEXT: { 352 int n = sqlite3_column_bytes(pStmt, i); 353 const unsigned char *z = sqlite3_column_text(pStmt, i); 354 hash_step((const unsigned char*)"3", 1); 355 hash_step(z, n); 356 if( g.fDebug & DEBUG_FULLTRACE ){ 357 fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i)); 358 } 359 break; 360 } 361 case SQLITE_BLOB: { 362 int n = sqlite3_column_bytes(pStmt, i); 363 const unsigned char *z = sqlite3_column_blob(pStmt, i); 364 hash_step((const unsigned char*)"4", 1); 365 hash_step(z, n); 366 if( g.fDebug & DEBUG_FULLTRACE ){ 367 fprintf(stderr, "BLOB (%d bytes)\n", n); 368 } 369 break; 370 } 371 } 372 } 373 } 374 sqlite3_finalize(pStmt); 375 } 376 377 378 /* 379 ** Print sketchy documentation for this utility program 380 */ 381 static void showHelp(void){ 382 printf("Usage: %s [options] FILE ...\n", g.zArgv0); 383 printf( 384 "Compute a SHA1 hash on the content of database FILE. System tables such as\n" 385 "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n" 386 "Options:\n" 387 " --debug N Set debugging flags to N (experts only)\n" 388 " --like PATTERN Only hash tables whose name is LIKE the pattern\n" 389 " --schema-only Only hash the schema - omit table content\n" 390 " --without-schema Only hash table content - omit the schema\n" 391 ); 392 } 393 394 int main(int argc, char **argv){ 395 const char *zDb = 0; /* Name of the database currently being hashed */ 396 int i; /* Loop counter */ 397 int rc; /* Subroutine return code */ 398 char *zErrMsg; /* Error message when opening database */ 399 sqlite3_stmt *pStmt; /* An SQLite query */ 400 const char *zLike = 0; /* LIKE pattern of tables to hash */ 401 int omitSchema = 0; /* True to compute hash on content only */ 402 int omitContent = 0; /* True to compute hash on schema only */ 403 int nFile = 0; /* Number of input filenames seen */ 404 405 g.zArgv0 = argv[0]; 406 sqlite3_config(SQLITE_CONFIG_SINGLETHREAD); 407 for(i=1; i<argc; i++){ 408 const char *z = argv[i]; 409 if( z[0]=='-' ){ 410 z++; 411 if( z[0]=='-' ) z++; 412 if( strcmp(z,"debug")==0 ){ 413 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]); 414 g.fDebug = strtol(argv[++i], 0, 0); 415 }else 416 if( strcmp(z,"help")==0 ){ 417 showHelp(); 418 return 0; 419 }else 420 if( strcmp(z,"like")==0 ){ 421 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]); 422 if( zLike!=0 ) cmdlineError("only one --like allowed"); 423 zLike = argv[++i]; 424 }else 425 if( strcmp(z,"schema-only")==0 ){ 426 omitContent = 1; 427 }else 428 if( strcmp(z,"without-schema")==0 ){ 429 omitSchema = 1; 430 }else 431 { 432 cmdlineError("unknown option: %s", argv[i]); 433 } 434 }else{ 435 nFile++; 436 if( nFile<i ) argv[nFile] = argv[i]; 437 } 438 } 439 if( nFile==0 ){ 440 cmdlineError("no input files specified - nothing to do"); 441 } 442 if( omitSchema && omitContent ){ 443 cmdlineError("only one of --without-schema and --omit-schema allowed"); 444 } 445 if( zLike==0 ) zLike = "%"; 446 447 for(i=1; i<=nFile; i++){ 448 static const int openFlags = 449 SQLITE_OPEN_READWRITE | /* Read/write so hot journals can recover */ 450 SQLITE_OPEN_URI 451 ; 452 zDb = argv[i]; 453 rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0); 454 if( rc ){ 455 fprintf(stderr, "cannot open database file '%s'\n", zDb); 456 continue; 457 } 458 rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg); 459 if( rc || zErrMsg ){ 460 sqlite3_close(g.db); 461 g.db = 0; 462 fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb); 463 continue; 464 } 465 466 /* Start the hash */ 467 hash_init(); 468 469 /* Hash table content */ 470 if( !omitContent ){ 471 pStmt = db_prepare( 472 "SELECT name FROM sqlite_master\n" 473 " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n" 474 " AND name NOT LIKE 'sqlite_%%'\n" 475 " AND name LIKE '%q'\n" 476 " ORDER BY name COLLATE nocase;\n", 477 zLike 478 ); 479 while( SQLITE_ROW==sqlite3_step(pStmt) ){ 480 /* We want rows of the table to be hashed in PRIMARY KEY order. 481 ** Technically, an ORDER BY clause is required to guarantee that 482 ** order. However, though not guaranteed by the documentation, every 483 ** historical version of SQLite has always output rows in PRIMARY KEY 484 ** order when there is no WHERE or GROUP BY clause, so the ORDER BY 485 ** can be safely omitted. */ 486 hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0)); 487 } 488 sqlite3_finalize(pStmt); 489 } 490 491 /* Hash the database schema */ 492 if( !omitSchema ){ 493 hash_one_query( 494 "SELECT type, name, tbl_name, sql FROM sqlite_master\n" 495 " WHERE tbl_name LIKE '%q'\n" 496 " ORDER BY name COLLATE nocase;\n", 497 zLike 498 ); 499 } 500 501 /* Finish and output the hash and close the database connection. */ 502 hash_finish(zDb); 503 sqlite3_close(g.db); 504 } 505 return 0; 506 } 507