xref: /sqlite-3.40.0/tool/dbhash.c (revision dfe4e6bb)
1 /*
2 ** 2016-06-07
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This is a utility program that computes an SHA1 hash on the content
14 ** of an SQLite database.
15 **
16 ** The hash is computed over just the content of the database.  Free
17 ** space inside of the database file, and alternative on-disk representations
18 ** of the same content (ex: UTF8 vs UTF16) do not affect the hash.  So,
19 ** for example, the database file page size, encoding, and auto_vacuum setting
20 ** can all be changed without changing the hash.
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <ctype.h>
26 #include <string.h>
27 #include <assert.h>
28 #include "sqlite3.h"
29 
30 /* Context for the SHA1 hash */
31 typedef struct SHA1Context SHA1Context;
32 struct SHA1Context {
33   unsigned int state[5];
34   unsigned int count[2];
35   unsigned char buffer[64];
36 };
37 
38 /*
39 ** All global variables are gathered into the "g" singleton.
40 */
41 struct GlobalVars {
42   const char *zArgv0;       /* Name of program */
43   unsigned fDebug;          /* Debug flags */
44   sqlite3 *db;              /* The database connection */
45   SHA1Context cx;           /* SHA1 hash context */
46 } g;
47 
48 /*
49 ** Debugging flags
50 */
51 #define DEBUG_FULLTRACE   0x00000001   /* Trace hash to stderr */
52 
53 /******************************************************************************
54 ** The Hash Engine
55 **
56 ** Modify these routines (and appropriate state fields in global variable 'g')
57 ** in order to compute a different (better?) hash of the database.
58 */
59 /*
60  * blk0() and blk() perform the initial expand.
61  * I got the idea of expanding during the round function from SSLeay
62  *
63  * blk0le() for little-endian and blk0be() for big-endian.
64  */
65 #if __GNUC__ && (defined(__i386__) || defined(__x86_64__))
66 /*
67  * GCC by itself only generates left rotates.  Use right rotates if
68  * possible to be kinder to dinky implementations with iterative rotate
69  * instructions.
70  */
71 #define SHA_ROT(op, x, k) \
72         ({ unsigned int y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; })
73 #define rol(x,k) SHA_ROT("roll", x, k)
74 #define ror(x,k) SHA_ROT("rorl", x, k)
75 
76 #else
77 /* Generic C equivalent */
78 #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
79 #define rol(x,k) SHA_ROT(x,k,32-(k))
80 #define ror(x,k) SHA_ROT(x,32-(k),k)
81 #endif
82 
83 
84 #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
85     |(rol(block[i],8)&0x00FF00FF))
86 #define blk0be(i) block[i]
87 #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
88     ^block[(i+2)&15]^block[i&15],1))
89 
90 /*
91  * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
92  *
93  * Rl0() for little-endian and Rb0() for big-endian.  Endianness is
94  * determined at run-time.
95  */
96 #define Rl0(v,w,x,y,z,i) \
97     z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
98 #define Rb0(v,w,x,y,z,i) \
99     z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
100 #define R1(v,w,x,y,z,i) \
101     z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
102 #define R2(v,w,x,y,z,i) \
103     z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
104 #define R3(v,w,x,y,z,i) \
105     z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
106 #define R4(v,w,x,y,z,i) \
107     z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
108 
109 /*
110  * Hash a single 512-bit block. This is the core of the algorithm.
111  */
112 #define a qq[0]
113 #define b qq[1]
114 #define c qq[2]
115 #define d qq[3]
116 #define e qq[4]
117 
118 void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
119   unsigned int qq[5]; /* a, b, c, d, e; */
120   static int one = 1;
121   unsigned int block[16];
122   memcpy(block, buffer, 64);
123   memcpy(qq,state,5*sizeof(unsigned int));
124 
125   /* Copy g.cx.state[] to working vars */
126   /*
127   a = state[0];
128   b = state[1];
129   c = state[2];
130   d = state[3];
131   e = state[4];
132   */
133 
134   /* 4 rounds of 20 operations each. Loop unrolled. */
135   if( 1 == *(unsigned char*)&one ){
136     Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
137     Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
138     Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
139     Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
140   }else{
141     Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
142     Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
143     Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
144     Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
145   }
146   R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
147   R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
148   R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
149   R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
150   R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
151   R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
152   R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
153   R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
154   R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
155   R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
156   R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
157   R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
158   R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
159   R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
160   R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
161   R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
162 
163   /* Add the working vars back into context.state[] */
164   state[0] += a;
165   state[1] += b;
166   state[2] += c;
167   state[3] += d;
168   state[4] += e;
169 }
170 
171 
172 /* Initialize the SHA1 hash */
173 static void hash_init(void){
174   /* SHA1 initialization constants */
175   g.cx.state[0] = 0x67452301;
176   g.cx.state[1] = 0xEFCDAB89;
177   g.cx.state[2] = 0x98BADCFE;
178   g.cx.state[3] = 0x10325476;
179   g.cx.state[4] = 0xC3D2E1F0;
180   g.cx.count[0] = g.cx.count[1] = 0;
181 }
182 
183 /* Add new content to the SHA1 hash */
184 static void hash_step(const unsigned char *data,  unsigned int len){
185   unsigned int i, j;
186 
187   j = g.cx.count[0];
188   if( (g.cx.count[0] += len << 3) < j ){
189     g.cx.count[1] += (len>>29)+1;
190   }
191   j = (j >> 3) & 63;
192   if( (j + len) > 63 ){
193     (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
194     SHA1Transform(g.cx.state, g.cx.buffer);
195     for(; i + 63 < len; i += 64){
196       SHA1Transform(g.cx.state, &data[i]);
197     }
198     j = 0;
199   }else{
200     i = 0;
201   }
202   (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
203 }
204 
205 
206 /* Add padding and compute and output the message digest. */
207 static void hash_finish(const char *zName){
208   unsigned int i;
209   unsigned char finalcount[8];
210   unsigned char digest[20];
211   static const char zEncode[] = "0123456789abcdef";
212   char zOut[41];
213 
214   for (i = 0; i < 8; i++){
215     finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
216        >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
217   }
218   hash_step((const unsigned char *)"\200", 1);
219   while ((g.cx.count[0] & 504) != 448){
220     hash_step((const unsigned char *)"\0", 1);
221   }
222   hash_step(finalcount, 8);  /* Should cause a SHA1Transform() */
223   for (i = 0; i < 20; i++){
224     digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
225   }
226   for(i=0; i<20; i++){
227     zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
228     zOut[i*2+1] = zEncode[digest[i] & 0xf];
229   }
230   zOut[i*2]= 0;
231   printf("%s %s\n", zOut, zName);
232 }
233 /* End of the hashing logic
234 *******************************************************************************/
235 
236 /*
237 ** Print an error resulting from faulting command-line arguments and
238 ** abort the program.
239 */
240 static void cmdlineError(const char *zFormat, ...){
241   va_list ap;
242   fprintf(stderr, "%s: ", g.zArgv0);
243   va_start(ap, zFormat);
244   vfprintf(stderr, zFormat, ap);
245   va_end(ap);
246   fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
247   exit(1);
248 }
249 
250 /*
251 ** Print an error message for an error that occurs at runtime, then
252 ** abort the program.
253 */
254 static void runtimeError(const char *zFormat, ...){
255   va_list ap;
256   fprintf(stderr, "%s: ", g.zArgv0);
257   va_start(ap, zFormat);
258   vfprintf(stderr, zFormat, ap);
259   va_end(ap);
260   fprintf(stderr, "\n");
261   exit(1);
262 }
263 
264 /*
265 ** Prepare a new SQL statement.  Print an error and abort if anything
266 ** goes wrong.
267 */
268 static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
269   char *zSql;
270   int rc;
271   sqlite3_stmt *pStmt;
272 
273   zSql = sqlite3_vmprintf(zFormat, ap);
274   if( zSql==0 ) runtimeError("out of memory");
275   rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
276   if( rc ){
277     runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
278                  zSql);
279   }
280   sqlite3_free(zSql);
281   return pStmt;
282 }
283 static sqlite3_stmt *db_prepare(const char *zFormat, ...){
284   va_list ap;
285   sqlite3_stmt *pStmt;
286   va_start(ap, zFormat);
287   pStmt = db_vprepare(zFormat, ap);
288   va_end(ap);
289   return pStmt;
290 }
291 
292 /*
293 ** Compute the hash for all rows of the query formed from the printf-style
294 ** zFormat and its argument.
295 */
296 static void hash_one_query(const char *zFormat, ...){
297   va_list ap;
298   sqlite3_stmt *pStmt;        /* The query defined by zFormat and "..." */
299   int nCol;                   /* Number of columns in the result set */
300   int i;                      /* Loop counter */
301 
302   /* Prepare the query defined by zFormat and "..." */
303   va_start(ap, zFormat);
304   pStmt = db_vprepare(zFormat, ap);
305   va_end(ap);
306   nCol = sqlite3_column_count(pStmt);
307 
308   /* Compute a hash over the result of the query */
309   while( SQLITE_ROW==sqlite3_step(pStmt) ){
310     for(i=0; i<nCol; i++){
311       switch( sqlite3_column_type(pStmt,i) ){
312         case SQLITE_NULL: {
313           hash_step((const unsigned char*)"0",1);
314           if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
315           break;
316         }
317         case SQLITE_INTEGER: {
318           sqlite3_uint64 u;
319           int j;
320           unsigned char x[8];
321           sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
322           memcpy(&u, &v, 8);
323           for(j=7; j>=0; j--){
324             x[j] = u & 0xff;
325             u >>= 8;
326           }
327           hash_step((const unsigned char*)"1",1);
328           hash_step(x,8);
329           if( g.fDebug & DEBUG_FULLTRACE ){
330             fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
331           }
332           break;
333         }
334         case SQLITE_FLOAT: {
335           sqlite3_uint64 u;
336           int j;
337           unsigned char x[8];
338           double r = sqlite3_column_double(pStmt,i);
339           memcpy(&u, &r, 8);
340           for(j=7; j>=0; j--){
341             x[j] = u & 0xff;
342             u >>= 8;
343           }
344           hash_step((const unsigned char*)"2",1);
345           hash_step(x,8);
346           if( g.fDebug & DEBUG_FULLTRACE ){
347             fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
348           }
349           break;
350         }
351         case SQLITE_TEXT: {
352           int n = sqlite3_column_bytes(pStmt, i);
353           const unsigned char *z = sqlite3_column_text(pStmt, i);
354           hash_step((const unsigned char*)"3", 1);
355           hash_step(z, n);
356           if( g.fDebug & DEBUG_FULLTRACE ){
357             fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
358           }
359           break;
360         }
361         case SQLITE_BLOB: {
362           int n = sqlite3_column_bytes(pStmt, i);
363           const unsigned char *z = sqlite3_column_blob(pStmt, i);
364           hash_step((const unsigned char*)"4", 1);
365           hash_step(z, n);
366           if( g.fDebug & DEBUG_FULLTRACE ){
367             fprintf(stderr, "BLOB (%d bytes)\n", n);
368           }
369           break;
370         }
371       }
372     }
373   }
374   sqlite3_finalize(pStmt);
375 }
376 
377 
378 /*
379 ** Print sketchy documentation for this utility program
380 */
381 static void showHelp(void){
382   printf("Usage: %s [options] FILE ...\n", g.zArgv0);
383   printf(
384 "Compute a SHA1 hash on the content of database FILE.  System tables such as\n"
385 "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
386 "Options:\n"
387 "   --debug N           Set debugging flags to N (experts only)\n"
388 "   --like PATTERN      Only hash tables whose name is LIKE the pattern\n"
389 "   --schema-only       Only hash the schema - omit table content\n"
390 "   --without-schema    Only hash table content - omit the schema\n"
391   );
392 }
393 
394 int main(int argc, char **argv){
395   const char *zDb = 0;         /* Name of the database currently being hashed */
396   int i;                       /* Loop counter */
397   int rc;                      /* Subroutine return code */
398   char *zErrMsg;               /* Error message when opening database */
399   sqlite3_stmt *pStmt;         /* An SQLite query */
400   const char *zLike = 0;       /* LIKE pattern of tables to hash */
401   int omitSchema = 0;          /* True to compute hash on content only */
402   int omitContent = 0;         /* True to compute hash on schema only */
403   int nFile = 0;               /* Number of input filenames seen */
404 
405   g.zArgv0 = argv[0];
406   sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
407   for(i=1; i<argc; i++){
408     const char *z = argv[i];
409     if( z[0]=='-' ){
410       z++;
411       if( z[0]=='-' ) z++;
412       if( strcmp(z,"debug")==0 ){
413         if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
414         g.fDebug = strtol(argv[++i], 0, 0);
415       }else
416       if( strcmp(z,"help")==0 ){
417         showHelp();
418         return 0;
419       }else
420       if( strcmp(z,"like")==0 ){
421         if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
422         if( zLike!=0 ) cmdlineError("only one --like allowed");
423         zLike = argv[++i];
424       }else
425       if( strcmp(z,"schema-only")==0 ){
426         omitContent = 1;
427       }else
428       if( strcmp(z,"without-schema")==0 ){
429         omitSchema = 1;
430       }else
431       {
432         cmdlineError("unknown option: %s", argv[i]);
433       }
434     }else{
435       nFile++;
436       if( nFile<i ) argv[nFile] = argv[i];
437     }
438   }
439   if( nFile==0 ){
440     cmdlineError("no input files specified - nothing to do");
441   }
442   if( omitSchema && omitContent ){
443     cmdlineError("only one of --without-schema and --omit-schema allowed");
444   }
445   if( zLike==0 ) zLike = "%";
446 
447   for(i=1; i<=nFile; i++){
448     static const int openFlags =
449        SQLITE_OPEN_READWRITE |     /* Read/write so hot journals can recover */
450        SQLITE_OPEN_URI
451     ;
452     zDb = argv[i];
453     rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
454     if( rc ){
455       fprintf(stderr, "cannot open database file '%s'\n", zDb);
456       continue;
457     }
458     rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
459     if( rc || zErrMsg ){
460       sqlite3_close(g.db);
461       g.db = 0;
462       fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
463       continue;
464     }
465 
466     /* Start the hash */
467     hash_init();
468 
469     /* Hash table content */
470     if( !omitContent ){
471       pStmt = db_prepare(
472         "SELECT name FROM sqlite_master\n"
473         " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
474         "   AND name NOT LIKE 'sqlite_%%'\n"
475         "   AND name LIKE '%q'\n"
476         " ORDER BY name COLLATE nocase;\n",
477         zLike
478       );
479       while( SQLITE_ROW==sqlite3_step(pStmt) ){
480         /* We want rows of the table to be hashed in PRIMARY KEY order.
481         ** Technically, an ORDER BY clause is required to guarantee that
482         ** order.  However, though not guaranteed by the documentation, every
483         ** historical version of SQLite has always output rows in PRIMARY KEY
484         ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
485         ** can be safely omitted. */
486         hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
487       }
488       sqlite3_finalize(pStmt);
489     }
490 
491     /* Hash the database schema */
492     if( !omitSchema ){
493       hash_one_query(
494          "SELECT type, name, tbl_name, sql FROM sqlite_master\n"
495          " WHERE tbl_name LIKE '%q'\n"
496          " ORDER BY name COLLATE nocase;\n",
497          zLike
498       );
499     }
500 
501     /* Finish and output the hash and close the database connection. */
502     hash_finish(zDb);
503     sqlite3_close(g.db);
504   }
505   return 0;
506 }
507