xref: /sqlite-3.40.0/tool/dbhash.c (revision 290fcaa2)
1 /*
2 ** 2016-06-07
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This is a utility program that computes a hash on the content
14 ** of an SQLite database.
15 **
16 ** The hash is computed over just the content of the database.  Free
17 ** space inside of the database file, and alternative on-disk representations
18 ** of the same content (ex: UTF8 vs UTF16) do not affect the hash.  So,
19 ** for example, the database file page size, encoding, and auto_vacuum setting
20 ** can all be changed without changing the hash.
21 */
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <ctype.h>
26 #include <string.h>
27 #include <assert.h>
28 #include "sqlite3.h"
29 
30 /* Context for the SHA1 hash */
31 typedef struct SHA1Context SHA1Context;
32 struct SHA1Context {
33   unsigned int state[5];
34   unsigned int count[2];
35   unsigned char buffer[64];
36 };
37 
38 /*
39 ** All global variables are gathered into the "g" singleton.
40 */
41 struct GlobalVars {
42   const char *zArgv0;       /* Name of program */
43   int bSchemaPK;            /* Use the schema-defined PK, not the true PK */
44   unsigned fDebug;          /* Debug flags */
45   sqlite3 *db;              /* The database connection */
46   SHA1Context cx;           /* SHA1 hash context */
47 } g;
48 
49 /******************************************************************************
50 ** The Hash Engine
51 **
52 ** Modify these routines (and appropriate state fields in global variable 'g')
53 ** in order to compute a different (better?) hash of the database.
54 */
55 /*
56  * blk0() and blk() perform the initial expand.
57  * I got the idea of expanding during the round function from SSLeay
58  *
59  * blk0le() for little-endian and blk0be() for big-endian.
60  */
61 #if __GNUC__ && (defined(__i386__) || defined(__x86_64__))
62 /*
63  * GCC by itself only generates left rotates.  Use right rotates if
64  * possible to be kinder to dinky implementations with iterative rotate
65  * instructions.
66  */
67 #define SHA_ROT(op, x, k) \
68         ({ unsigned int y; asm(op " %1,%0" : "=r" (y) : "I" (k), "0" (x)); y; })
69 #define rol(x,k) SHA_ROT("roll", x, k)
70 #define ror(x,k) SHA_ROT("rorl", x, k)
71 
72 #else
73 /* Generic C equivalent */
74 #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
75 #define rol(x,k) SHA_ROT(x,k,32-(k))
76 #define ror(x,k) SHA_ROT(x,32-(k),k)
77 #endif
78 
79 
80 #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
81     |(rol(block[i],8)&0x00FF00FF))
82 #define blk0be(i) block[i]
83 #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
84     ^block[(i+2)&15]^block[i&15],1))
85 
86 /*
87  * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
88  *
89  * Rl0() for little-endian and Rb0() for big-endian.  Endianness is
90  * determined at run-time.
91  */
92 #define Rl0(v,w,x,y,z,i) \
93     z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
94 #define Rb0(v,w,x,y,z,i) \
95     z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
96 #define R1(v,w,x,y,z,i) \
97     z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
98 #define R2(v,w,x,y,z,i) \
99     z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
100 #define R3(v,w,x,y,z,i) \
101     z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
102 #define R4(v,w,x,y,z,i) \
103     z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
104 
105 /*
106  * Hash a single 512-bit block. This is the core of the algorithm.
107  */
108 #define a qq[0]
109 #define b qq[1]
110 #define c qq[2]
111 #define d qq[3]
112 #define e qq[4]
113 
114 void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
115   unsigned int qq[5]; /* a, b, c, d, e; */
116   static int one = 1;
117   unsigned int block[16];
118   memcpy(block, buffer, 64);
119   memcpy(qq,state,5*sizeof(unsigned int));
120 
121   /* Copy g.cx.state[] to working vars */
122   /*
123   a = state[0];
124   b = state[1];
125   c = state[2];
126   d = state[3];
127   e = state[4];
128   */
129 
130   /* 4 rounds of 20 operations each. Loop unrolled. */
131   if( 1 == *(unsigned char*)&one ){
132     Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
133     Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
134     Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
135     Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
136   }else{
137     Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
138     Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
139     Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
140     Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
141   }
142   R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
143   R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
144   R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
145   R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
146   R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
147   R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
148   R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
149   R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
150   R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
151   R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
152   R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
153   R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
154   R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
155   R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
156   R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
157   R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
158 
159   /* Add the working vars back into context.state[] */
160   state[0] += a;
161   state[1] += b;
162   state[2] += c;
163   state[3] += d;
164   state[4] += e;
165 }
166 
167 
168 /* Initialize the SHA1 hash */
169 static void hash_init(void){
170   /* SHA1 initialization constants */
171   g.cx.state[0] = 0x67452301;
172   g.cx.state[1] = 0xEFCDAB89;
173   g.cx.state[2] = 0x98BADCFE;
174   g.cx.state[3] = 0x10325476;
175   g.cx.state[4] = 0xC3D2E1F0;
176   g.cx.count[0] = g.cx.count[1] = 0;
177 }
178 
179 /* Add new content to the SHA1 hash */
180 static void hash_step(const unsigned char *data,  unsigned int len){
181   unsigned int i, j;
182 
183   j = g.cx.count[0];
184   if( (g.cx.count[0] += len << 3) < j ){
185     g.cx.count[1] += (len>>29)+1;
186   }
187   j = (j >> 3) & 63;
188   if( (j + len) > 63 ){
189     (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
190     SHA1Transform(g.cx.state, g.cx.buffer);
191     for(; i + 63 < len; i += 64){
192       SHA1Transform(g.cx.state, &data[i]);
193     }
194     j = 0;
195   }else{
196     i = 0;
197   }
198   (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
199 }
200 
201 
202 /* Add padding and compute and output the message digest. */
203 static void hash_finish(void){
204   unsigned int i;
205   unsigned char finalcount[8];
206   unsigned char digest[20];
207   static const char zEncode[] = "0123456789abcdef";
208   char zOut[40];
209 
210   for (i = 0; i < 8; i++){
211     finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
212        >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
213   }
214   hash_step((const unsigned char *)"\200", 1);
215   while ((g.cx.count[0] & 504) != 448){
216     hash_step((const unsigned char *)"\0", 1);
217   }
218   hash_step(finalcount, 8);  /* Should cause a SHA1Transform() */
219   for (i = 0; i < 20; i++){
220     digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
221   }
222   for(i=0; i<20; i++){
223     zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
224     zOut[i*2+1] = zEncode[digest[i] & 0xf];
225   }
226   zOut[i*2]= 0;
227   printf("%s\n", zOut);
228 }
229 /* End of the hashing logic
230 *******************************************************************************/
231 
232 /*
233 ** Print an error resulting from faulting command-line arguments and
234 ** abort the program.
235 */
236 static void cmdlineError(const char *zFormat, ...){
237   va_list ap;
238   fprintf(stderr, "%s: ", g.zArgv0);
239   va_start(ap, zFormat);
240   vfprintf(stderr, zFormat, ap);
241   va_end(ap);
242   fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
243   exit(1);
244 }
245 
246 /*
247 ** Print an error message for an error that occurs at runtime, then
248 ** abort the program.
249 */
250 static void runtimeError(const char *zFormat, ...){
251   va_list ap;
252   fprintf(stderr, "%s: ", g.zArgv0);
253   va_start(ap, zFormat);
254   vfprintf(stderr, zFormat, ap);
255   va_end(ap);
256   fprintf(stderr, "\n");
257   exit(1);
258 }
259 
260 /*
261 ** Prepare a new SQL statement.  Print an error and abort if anything
262 ** goes wrong.
263 */
264 static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
265   char *zSql;
266   int rc;
267   sqlite3_stmt *pStmt;
268 
269   zSql = sqlite3_vmprintf(zFormat, ap);
270   if( zSql==0 ) runtimeError("out of memory");
271   rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
272   if( rc ){
273     runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
274                  zSql);
275   }
276   sqlite3_free(zSql);
277   return pStmt;
278 }
279 static sqlite3_stmt *db_prepare(const char *zFormat, ...){
280   va_list ap;
281   sqlite3_stmt *pStmt;
282   va_start(ap, zFormat);
283   pStmt = db_vprepare(zFormat, ap);
284   va_end(ap);
285   return pStmt;
286 }
287 
288 /*
289 ** Compute the hash for a single table named zTab
290 */
291 static void hash_one_table(const char *zTab){
292   sqlite3_stmt *pStmt;
293   int nCol;
294   int i;
295   pStmt = db_prepare("SELECT * FROM \"%w\";", zTab);
296   nCol = sqlite3_column_count(pStmt);
297   while( SQLITE_ROW==sqlite3_step(pStmt) ){
298     for(i=0; i<nCol; i++){
299       switch( sqlite3_column_type(pStmt,i) ){
300         case SQLITE_NULL: {
301           hash_step((const unsigned char*)"0",1);
302           break;
303         }
304         case SQLITE_INTEGER: {
305           sqlite3_uint64 u;
306           int j;
307           unsigned char x[8];
308           sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
309           memcpy(&u, &v, 8);
310           for(j=7; j>=0; j--){
311             x[j] = u & 0xff;
312             u >>= 8;
313           }
314           hash_step((const unsigned char*)"1",1);
315           hash_step(x,8);
316           break;
317         }
318         case SQLITE_FLOAT: {
319           sqlite3_uint64 u;
320           int j;
321           unsigned char x[8];
322           double r = sqlite3_column_double(pStmt,i);
323           memcpy(&u, &r, 8);
324           for(j=7; j>=0; j--){
325             x[j] = u & 0xff;
326             u >>= 8;
327           }
328           hash_step((const unsigned char*)"2",1);
329           hash_step(x,8);
330           break;
331         }
332         case SQLITE_TEXT: {
333           int n = sqlite3_column_bytes(pStmt, i);
334           const unsigned char *z = sqlite3_column_text(pStmt, i);
335           hash_step((const unsigned char*)"3", 1);
336           hash_step(z, n);
337           break;
338         }
339         case SQLITE_BLOB: {
340           int n = sqlite3_column_bytes(pStmt, i);
341           const unsigned char *z = sqlite3_column_blob(pStmt, i);
342           hash_step((const unsigned char*)"4", 1);
343           hash_step(z, n);
344           break;
345         }
346       }
347     }
348   }
349   sqlite3_finalize(pStmt);
350 }
351 
352 
353 /*
354 ** Print sketchy documentation for this utility program
355 */
356 static void showHelp(void){
357   printf("Usage: %s DB\n", g.zArgv0);
358   printf(
359 "Compute a hash on the content of database DB\n"
360   );
361 }
362 
363 int main(int argc, char **argv){
364   const char *zDb = 0;
365   int i;
366   int rc;
367   char *zErrMsg;
368   sqlite3_stmt *pStmt;
369 
370   g.zArgv0 = argv[0];
371   sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
372   for(i=1; i<argc; i++){
373     const char *z = argv[i];
374     if( z[0]=='-' ){
375       z++;
376       if( z[0]=='-' ) z++;
377       if( strcmp(z,"debug")==0 ){
378         if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
379         g.fDebug = strtol(argv[++i], 0, 0);
380       }else
381       if( strcmp(z,"help")==0 ){
382         showHelp();
383         return 0;
384       }else
385       if( strcmp(z,"primarykey")==0 ){
386         g.bSchemaPK = 1;
387       }else
388       {
389         cmdlineError("unknown option: %s", argv[i]);
390       }
391     }else if( zDb==0 ){
392       zDb = argv[i];
393     }else{
394       cmdlineError("unknown argument: %s", argv[i]);
395     }
396   }
397   if( zDb==0 ){
398     cmdlineError("database argument missing");
399   }
400   rc = sqlite3_open(zDb, &g.db);
401   if( rc ){
402     cmdlineError("cannot open database file \"%s\"", zDb);
403   }
404   rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, &zErrMsg);
405   if( rc || zErrMsg ){
406     cmdlineError("\"%s\" does not appear to be a valid SQLite database", zDb);
407   }
408 
409   /* Handle tables one by one */
410   pStmt = db_prepare(
411     "SELECT name FROM sqlite_master\n"
412     " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
413     "UNION SELECT 'sqlite_master' AS name\n"
414     " ORDER BY name;\n"
415   );
416   hash_init();
417   while( SQLITE_ROW==sqlite3_step(pStmt) ){
418     hash_one_table((const char*)sqlite3_column_text(pStmt,0));
419   }
420   hash_finish();
421 
422   sqlite3_close(g.db);
423   return 0;
424 }
425