xref: /sqlite-3.40.0/tool/dbhash.c (revision 067b92ba)
1 /*
2 ** 2016-06-07
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This is a utility program that computes an SHA1 hash on the content
14 ** of an SQLite database.
15 **
16 ** The hash is computed over just the content of the database.  Free
17 ** space inside of the database file, and alternative on-disk representations
18 ** of the same content (ex: UTF8 vs UTF16) do not affect the hash.  So,
19 ** for example, the database file page size, encoding, and auto_vacuum setting
20 ** can all be changed without changing the hash.
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <ctype.h>
26 #include <string.h>
27 #include <assert.h>
28 #include "sqlite3.h"
29 
30 /* Context for the SHA1 hash */
31 typedef struct SHA1Context SHA1Context;
32 struct SHA1Context {
33   unsigned int state[5];
34   unsigned int count[2];
35   unsigned char buffer[64];
36 };
37 
38 /*
39 ** All global variables are gathered into the "g" singleton.
40 */
41 struct GlobalVars {
42   const char *zArgv0;       /* Name of program */
43   unsigned fDebug;          /* Debug flags */
44   sqlite3 *db;              /* The database connection */
45   SHA1Context cx;           /* SHA1 hash context */
46 } g;
47 
48 /*
49 ** Debugging flags
50 */
51 #define DEBUG_FULLTRACE   0x00000001   /* Trace hash to stderr */
52 
53 /******************************************************************************
54 ** The Hash Engine
55 **
56 ** Modify these routines (and appropriate state fields in global variable 'g')
57 ** in order to compute a different (better?) hash of the database.
58 */
59 /*
60  * blk0() and blk() perform the initial expand.
61  * I got the idea of expanding during the round function from SSLeay
62  *
63  * blk0le() for little-endian and blk0be() for big-endian.
64  */
65 #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
66 #define rol(x,k) SHA_ROT(x,k,32-(k))
67 #define ror(x,k) SHA_ROT(x,32-(k),k)
68 
69 #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
70     |(rol(block[i],8)&0x00FF00FF))
71 #define blk0be(i) block[i]
72 #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
73     ^block[(i+2)&15]^block[i&15],1))
74 
75 /*
76  * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
77  *
78  * Rl0() for little-endian and Rb0() for big-endian.  Endianness is
79  * determined at run-time.
80  */
81 #define Rl0(v,w,x,y,z,i) \
82     z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
83 #define Rb0(v,w,x,y,z,i) \
84     z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
85 #define R1(v,w,x,y,z,i) \
86     z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
87 #define R2(v,w,x,y,z,i) \
88     z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
89 #define R3(v,w,x,y,z,i) \
90     z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
91 #define R4(v,w,x,y,z,i) \
92     z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
93 
94 /*
95  * Hash a single 512-bit block. This is the core of the algorithm.
96  */
97 #define a qq[0]
98 #define b qq[1]
99 #define c qq[2]
100 #define d qq[3]
101 #define e qq[4]
102 
SHA1Transform(unsigned int state[5],const unsigned char buffer[64])103 void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
104   unsigned int qq[5]; /* a, b, c, d, e; */
105   static int one = 1;
106   unsigned int block[16];
107   memcpy(block, buffer, 64);
108   memcpy(qq,state,5*sizeof(unsigned int));
109 
110   /* Copy g.cx.state[] to working vars */
111   /*
112   a = state[0];
113   b = state[1];
114   c = state[2];
115   d = state[3];
116   e = state[4];
117   */
118 
119   /* 4 rounds of 20 operations each. Loop unrolled. */
120   if( 1 == *(unsigned char*)&one ){
121     Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
122     Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
123     Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
124     Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
125   }else{
126     Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
127     Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
128     Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
129     Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
130   }
131   R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
132   R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
133   R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
134   R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
135   R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
136   R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
137   R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
138   R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
139   R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
140   R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
141   R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
142   R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
143   R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
144   R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
145   R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
146   R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
147 
148   /* Add the working vars back into context.state[] */
149   state[0] += a;
150   state[1] += b;
151   state[2] += c;
152   state[3] += d;
153   state[4] += e;
154 }
155 
156 
157 /* Initialize the SHA1 hash */
hash_init(void)158 static void hash_init(void){
159   /* SHA1 initialization constants */
160   g.cx.state[0] = 0x67452301;
161   g.cx.state[1] = 0xEFCDAB89;
162   g.cx.state[2] = 0x98BADCFE;
163   g.cx.state[3] = 0x10325476;
164   g.cx.state[4] = 0xC3D2E1F0;
165   g.cx.count[0] = g.cx.count[1] = 0;
166 }
167 
168 /* Add new content to the SHA1 hash */
hash_step(const unsigned char * data,unsigned int len)169 static void hash_step(const unsigned char *data,  unsigned int len){
170   unsigned int i, j;
171 
172   j = g.cx.count[0];
173   if( (g.cx.count[0] += len << 3) < j ){
174     g.cx.count[1] += (len>>29)+1;
175   }
176   j = (j >> 3) & 63;
177   if( (j + len) > 63 ){
178     (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
179     SHA1Transform(g.cx.state, g.cx.buffer);
180     for(; i + 63 < len; i += 64){
181       SHA1Transform(g.cx.state, &data[i]);
182     }
183     j = 0;
184   }else{
185     i = 0;
186   }
187   (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
188 }
189 
190 
191 /* Add padding and compute and output the message digest. */
hash_finish(const char * zName)192 static void hash_finish(const char *zName){
193   unsigned int i;
194   unsigned char finalcount[8];
195   unsigned char digest[20];
196   static const char zEncode[] = "0123456789abcdef";
197   char zOut[41];
198 
199   for (i = 0; i < 8; i++){
200     finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
201        >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
202   }
203   hash_step((const unsigned char *)"\200", 1);
204   while ((g.cx.count[0] & 504) != 448){
205     hash_step((const unsigned char *)"\0", 1);
206   }
207   hash_step(finalcount, 8);  /* Should cause a SHA1Transform() */
208   for (i = 0; i < 20; i++){
209     digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
210   }
211   for(i=0; i<20; i++){
212     zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
213     zOut[i*2+1] = zEncode[digest[i] & 0xf];
214   }
215   zOut[i*2]= 0;
216   printf("%s %s\n", zOut, zName);
217 }
218 /* End of the hashing logic
219 *******************************************************************************/
220 
221 /*
222 ** Print an error resulting from faulting command-line arguments and
223 ** abort the program.
224 */
cmdlineError(const char * zFormat,...)225 static void cmdlineError(const char *zFormat, ...){
226   va_list ap;
227   fprintf(stderr, "%s: ", g.zArgv0);
228   va_start(ap, zFormat);
229   vfprintf(stderr, zFormat, ap);
230   va_end(ap);
231   fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
232   exit(1);
233 }
234 
235 /*
236 ** Print an error message for an error that occurs at runtime, then
237 ** abort the program.
238 */
runtimeError(const char * zFormat,...)239 static void runtimeError(const char *zFormat, ...){
240   va_list ap;
241   fprintf(stderr, "%s: ", g.zArgv0);
242   va_start(ap, zFormat);
243   vfprintf(stderr, zFormat, ap);
244   va_end(ap);
245   fprintf(stderr, "\n");
246   exit(1);
247 }
248 
249 /*
250 ** Prepare a new SQL statement.  Print an error and abort if anything
251 ** goes wrong.
252 */
db_vprepare(const char * zFormat,va_list ap)253 static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
254   char *zSql;
255   int rc;
256   sqlite3_stmt *pStmt;
257 
258   zSql = sqlite3_vmprintf(zFormat, ap);
259   if( zSql==0 ) runtimeError("out of memory");
260   rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
261   if( rc ){
262     runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
263                  zSql);
264   }
265   sqlite3_free(zSql);
266   return pStmt;
267 }
db_prepare(const char * zFormat,...)268 static sqlite3_stmt *db_prepare(const char *zFormat, ...){
269   va_list ap;
270   sqlite3_stmt *pStmt;
271   va_start(ap, zFormat);
272   pStmt = db_vprepare(zFormat, ap);
273   va_end(ap);
274   return pStmt;
275 }
276 
277 /*
278 ** Compute the hash for all rows of the query formed from the printf-style
279 ** zFormat and its argument.
280 */
hash_one_query(const char * zFormat,...)281 static void hash_one_query(const char *zFormat, ...){
282   va_list ap;
283   sqlite3_stmt *pStmt;        /* The query defined by zFormat and "..." */
284   int nCol;                   /* Number of columns in the result set */
285   int i;                      /* Loop counter */
286 
287   /* Prepare the query defined by zFormat and "..." */
288   va_start(ap, zFormat);
289   pStmt = db_vprepare(zFormat, ap);
290   va_end(ap);
291   nCol = sqlite3_column_count(pStmt);
292 
293   /* Compute a hash over the result of the query */
294   while( SQLITE_ROW==sqlite3_step(pStmt) ){
295     for(i=0; i<nCol; i++){
296       switch( sqlite3_column_type(pStmt,i) ){
297         case SQLITE_NULL: {
298           hash_step((const unsigned char*)"0",1);
299           if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
300           break;
301         }
302         case SQLITE_INTEGER: {
303           sqlite3_uint64 u;
304           int j;
305           unsigned char x[8];
306           sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
307           memcpy(&u, &v, 8);
308           for(j=7; j>=0; j--){
309             x[j] = u & 0xff;
310             u >>= 8;
311           }
312           hash_step((const unsigned char*)"1",1);
313           hash_step(x,8);
314           if( g.fDebug & DEBUG_FULLTRACE ){
315             fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
316           }
317           break;
318         }
319         case SQLITE_FLOAT: {
320           sqlite3_uint64 u;
321           int j;
322           unsigned char x[8];
323           double r = sqlite3_column_double(pStmt,i);
324           memcpy(&u, &r, 8);
325           for(j=7; j>=0; j--){
326             x[j] = u & 0xff;
327             u >>= 8;
328           }
329           hash_step((const unsigned char*)"2",1);
330           hash_step(x,8);
331           if( g.fDebug & DEBUG_FULLTRACE ){
332             fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
333           }
334           break;
335         }
336         case SQLITE_TEXT: {
337           int n = sqlite3_column_bytes(pStmt, i);
338           const unsigned char *z = sqlite3_column_text(pStmt, i);
339           hash_step((const unsigned char*)"3", 1);
340           hash_step(z, n);
341           if( g.fDebug & DEBUG_FULLTRACE ){
342             fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
343           }
344           break;
345         }
346         case SQLITE_BLOB: {
347           int n = sqlite3_column_bytes(pStmt, i);
348           const unsigned char *z = sqlite3_column_blob(pStmt, i);
349           hash_step((const unsigned char*)"4", 1);
350           hash_step(z, n);
351           if( g.fDebug & DEBUG_FULLTRACE ){
352             fprintf(stderr, "BLOB (%d bytes)\n", n);
353           }
354           break;
355         }
356       }
357     }
358   }
359   sqlite3_finalize(pStmt);
360 }
361 
362 
363 /*
364 ** Print sketchy documentation for this utility program
365 */
showHelp(void)366 static void showHelp(void){
367   printf("Usage: %s [options] FILE ...\n", g.zArgv0);
368   printf(
369 "Compute a SHA1 hash on the content of database FILE.  System tables such as\n"
370 "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
371 "Options:\n"
372 "   --debug N           Set debugging flags to N (experts only)\n"
373 "   --like PATTERN      Only hash tables whose name is LIKE the pattern\n"
374 "   --schema-only       Only hash the schema - omit table content\n"
375 "   --without-schema    Only hash table content - omit the schema\n"
376   );
377 }
378 
main(int argc,char ** argv)379 int main(int argc, char **argv){
380   const char *zDb = 0;         /* Name of the database currently being hashed */
381   int i;                       /* Loop counter */
382   int rc;                      /* Subroutine return code */
383   char *zErrMsg;               /* Error message when opening database */
384   sqlite3_stmt *pStmt;         /* An SQLite query */
385   const char *zLike = 0;       /* LIKE pattern of tables to hash */
386   int omitSchema = 0;          /* True to compute hash on content only */
387   int omitContent = 0;         /* True to compute hash on schema only */
388   int nFile = 0;               /* Number of input filenames seen */
389 
390   g.zArgv0 = argv[0];
391   sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
392   for(i=1; i<argc; i++){
393     const char *z = argv[i];
394     if( z[0]=='-' ){
395       z++;
396       if( z[0]=='-' ) z++;
397       if( strcmp(z,"debug")==0 ){
398         if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
399         g.fDebug = strtol(argv[++i], 0, 0);
400       }else
401       if( strcmp(z,"help")==0 ){
402         showHelp();
403         return 0;
404       }else
405       if( strcmp(z,"like")==0 ){
406         if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
407         if( zLike!=0 ) cmdlineError("only one --like allowed");
408         zLike = argv[++i];
409       }else
410       if( strcmp(z,"schema-only")==0 ){
411         omitContent = 1;
412       }else
413       if( strcmp(z,"without-schema")==0 ){
414         omitSchema = 1;
415       }else
416       {
417         cmdlineError("unknown option: %s", argv[i]);
418       }
419     }else{
420       nFile++;
421       if( nFile<i ) argv[nFile] = argv[i];
422     }
423   }
424   if( nFile==0 ){
425     cmdlineError("no input files specified - nothing to do");
426   }
427   if( omitSchema && omitContent ){
428     cmdlineError("only one of --without-schema and --omit-schema allowed");
429   }
430   if( zLike==0 ) zLike = "%";
431 
432   for(i=1; i<=nFile; i++){
433     static const int openFlags =
434        SQLITE_OPEN_READWRITE |     /* Read/write so hot journals can recover */
435        SQLITE_OPEN_URI
436     ;
437     zDb = argv[i];
438     rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
439     if( rc ){
440       fprintf(stderr, "cannot open database file '%s'\n", zDb);
441       continue;
442     }
443     rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_schema", 0, 0, &zErrMsg);
444     if( rc || zErrMsg ){
445       sqlite3_close(g.db);
446       g.db = 0;
447       fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
448       continue;
449     }
450 
451     /* Start the hash */
452     hash_init();
453 
454     /* Hash table content */
455     if( !omitContent ){
456       pStmt = db_prepare(
457         "SELECT name FROM sqlite_schema\n"
458         " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
459         "   AND name NOT LIKE 'sqlite_%%'\n"
460         "   AND name LIKE '%q'\n"
461         " ORDER BY name COLLATE nocase;\n",
462         zLike
463       );
464       while( SQLITE_ROW==sqlite3_step(pStmt) ){
465         /* We want rows of the table to be hashed in PRIMARY KEY order.
466         ** Technically, an ORDER BY clause is required to guarantee that
467         ** order.  However, though not guaranteed by the documentation, every
468         ** historical version of SQLite has always output rows in PRIMARY KEY
469         ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
470         ** can be safely omitted. */
471         hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
472       }
473       sqlite3_finalize(pStmt);
474     }
475 
476     /* Hash the database schema */
477     if( !omitSchema ){
478       hash_one_query(
479          "SELECT type, name, tbl_name, sql FROM sqlite_schema\n"
480          " WHERE tbl_name LIKE '%q'\n"
481          " ORDER BY name COLLATE nocase;\n",
482          zLike
483       );
484     }
485 
486     /* Finish and output the hash and close the database connection. */
487     hash_finish(zDb);
488     sqlite3_close(g.db);
489   }
490   return 0;
491 }
492