Change EXPLAIN QUERY PLAN output to say "USE TEMP B-TREE FOR LAST TERM OF ORDER BY...
[sqlite.git] / tool / dbhash.c
blob78685dcd6c336ada44addc8561e02317af7da5e5
1 /*
2 ** 2016-06-07
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
13 ** This is a utility program that computes an SHA1 hash on the content
14 ** of an SQLite database.
16 ** The hash is computed over just the content of the database. Free
17 ** space inside of the database file, and alternative on-disk representations
18 ** of the same content (ex: UTF8 vs UTF16) do not affect the hash. So,
19 ** for example, the database file page size, encoding, and auto_vacuum setting
20 ** can all be changed without changing the hash.
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <stdarg.h>
25 #include <ctype.h>
26 #include <string.h>
27 #include <assert.h>
28 #include "sqlite3.h"
30 /* Context for the SHA1 hash */
31 typedef struct SHA1Context SHA1Context;
32 struct SHA1Context {
33 unsigned int state[5];
34 unsigned int count[2];
35 unsigned char buffer[64];
39 ** All global variables are gathered into the "g" singleton.
41 struct GlobalVars {
42 const char *zArgv0; /* Name of program */
43 unsigned fDebug; /* Debug flags */
44 sqlite3 *db; /* The database connection */
45 SHA1Context cx; /* SHA1 hash context */
46 } g;
49 ** Debugging flags
51 #define DEBUG_FULLTRACE 0x00000001 /* Trace hash to stderr */
53 /******************************************************************************
54 ** The Hash Engine
56 ** Modify these routines (and appropriate state fields in global variable 'g')
57 ** in order to compute a different (better?) hash of the database.
60 * blk0() and blk() perform the initial expand.
61 * I got the idea of expanding during the round function from SSLeay
63 * blk0le() for little-endian and blk0be() for big-endian.
65 #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
66 #define rol(x,k) SHA_ROT(x,k,32-(k))
67 #define ror(x,k) SHA_ROT(x,32-(k),k)
69 #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
70 |(rol(block[i],8)&0x00FF00FF))
71 #define blk0be(i) block[i]
72 #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
73 ^block[(i+2)&15]^block[i&15],1))
76 * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
78 * Rl0() for little-endian and Rb0() for big-endian. Endianness is
79 * determined at run-time.
81 #define Rl0(v,w,x,y,z,i) \
82 z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
83 #define Rb0(v,w,x,y,z,i) \
84 z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
85 #define R1(v,w,x,y,z,i) \
86 z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
87 #define R2(v,w,x,y,z,i) \
88 z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
89 #define R3(v,w,x,y,z,i) \
90 z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
91 #define R4(v,w,x,y,z,i) \
92 z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
95 * Hash a single 512-bit block. This is the core of the algorithm.
97 #define a qq[0]
98 #define b qq[1]
99 #define c qq[2]
100 #define d qq[3]
101 #define e qq[4]
103 void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
104 unsigned int qq[5]; /* a, b, c, d, e; */
105 static int one = 1;
106 unsigned int block[16];
107 memcpy(block, buffer, 64);
108 memcpy(qq,state,5*sizeof(unsigned int));
110 /* Copy g.cx.state[] to working vars */
112 a = state[0];
113 b = state[1];
114 c = state[2];
115 d = state[3];
116 e = state[4];
119 /* 4 rounds of 20 operations each. Loop unrolled. */
120 if( 1 == *(unsigned char*)&one ){
121 Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
122 Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
123 Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
124 Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
125 }else{
126 Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
127 Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
128 Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
129 Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
131 R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
132 R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
133 R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
134 R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
135 R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
136 R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
137 R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
138 R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
139 R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
140 R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
141 R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
142 R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
143 R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
144 R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
145 R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
146 R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
148 /* Add the working vars back into context.state[] */
149 state[0] += a;
150 state[1] += b;
151 state[2] += c;
152 state[3] += d;
153 state[4] += e;
157 /* Initialize the SHA1 hash */
158 static void hash_init(void){
159 /* SHA1 initialization constants */
160 g.cx.state[0] = 0x67452301;
161 g.cx.state[1] = 0xEFCDAB89;
162 g.cx.state[2] = 0x98BADCFE;
163 g.cx.state[3] = 0x10325476;
164 g.cx.state[4] = 0xC3D2E1F0;
165 g.cx.count[0] = g.cx.count[1] = 0;
168 /* Add new content to the SHA1 hash */
169 static void hash_step(const unsigned char *data, unsigned int len){
170 unsigned int i, j;
172 j = g.cx.count[0];
173 if( (g.cx.count[0] += len << 3) < j ){
174 g.cx.count[1] += (len>>29)+1;
176 j = (j >> 3) & 63;
177 if( (j + len) > 63 ){
178 (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
179 SHA1Transform(g.cx.state, g.cx.buffer);
180 for(; i + 63 < len; i += 64){
181 SHA1Transform(g.cx.state, &data[i]);
183 j = 0;
184 }else{
185 i = 0;
187 (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
191 /* Add padding and compute and output the message digest. */
192 static void hash_finish(const char *zName){
193 unsigned int i;
194 unsigned char finalcount[8];
195 unsigned char digest[20];
196 static const char zEncode[] = "0123456789abcdef";
197 char zOut[41];
199 for (i = 0; i < 8; i++){
200 finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
201 >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
203 hash_step((const unsigned char *)"\200", 1);
204 while ((g.cx.count[0] & 504) != 448){
205 hash_step((const unsigned char *)"\0", 1);
207 hash_step(finalcount, 8); /* Should cause a SHA1Transform() */
208 for (i = 0; i < 20; i++){
209 digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
211 for(i=0; i<20; i++){
212 zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
213 zOut[i*2+1] = zEncode[digest[i] & 0xf];
215 zOut[i*2]= 0;
216 printf("%s %s\n", zOut, zName);
218 /* End of the hashing logic
219 *******************************************************************************/
222 ** Print an error resulting from faulting command-line arguments and
223 ** abort the program.
225 static void cmdlineError(const char *zFormat, ...){
226 va_list ap;
227 fprintf(stderr, "%s: ", g.zArgv0);
228 va_start(ap, zFormat);
229 vfprintf(stderr, zFormat, ap);
230 va_end(ap);
231 fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
232 exit(1);
236 ** Print an error message for an error that occurs at runtime, then
237 ** abort the program.
239 static void runtimeError(const char *zFormat, ...){
240 va_list ap;
241 fprintf(stderr, "%s: ", g.zArgv0);
242 va_start(ap, zFormat);
243 vfprintf(stderr, zFormat, ap);
244 va_end(ap);
245 fprintf(stderr, "\n");
246 exit(1);
250 ** Prepare a new SQL statement. Print an error and abort if anything
251 ** goes wrong.
253 static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
254 char *zSql;
255 int rc;
256 sqlite3_stmt *pStmt;
258 zSql = sqlite3_vmprintf(zFormat, ap);
259 if( zSql==0 ) runtimeError("out of memory");
260 rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
261 if( rc ){
262 runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
263 zSql);
265 sqlite3_free(zSql);
266 return pStmt;
268 static sqlite3_stmt *db_prepare(const char *zFormat, ...){
269 va_list ap;
270 sqlite3_stmt *pStmt;
271 va_start(ap, zFormat);
272 pStmt = db_vprepare(zFormat, ap);
273 va_end(ap);
274 return pStmt;
278 ** Compute the hash for all rows of the query formed from the printf-style
279 ** zFormat and its argument.
281 static void hash_one_query(const char *zFormat, ...){
282 va_list ap;
283 sqlite3_stmt *pStmt; /* The query defined by zFormat and "..." */
284 int nCol; /* Number of columns in the result set */
285 int i; /* Loop counter */
287 /* Prepare the query defined by zFormat and "..." */
288 va_start(ap, zFormat);
289 pStmt = db_vprepare(zFormat, ap);
290 va_end(ap);
291 nCol = sqlite3_column_count(pStmt);
293 /* Compute a hash over the result of the query */
294 while( SQLITE_ROW==sqlite3_step(pStmt) ){
295 for(i=0; i<nCol; i++){
296 switch( sqlite3_column_type(pStmt,i) ){
297 case SQLITE_NULL: {
298 hash_step((const unsigned char*)"0",1);
299 if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
300 break;
302 case SQLITE_INTEGER: {
303 sqlite3_uint64 u;
304 int j;
305 unsigned char x[8];
306 sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
307 memcpy(&u, &v, 8);
308 for(j=7; j>=0; j--){
309 x[j] = u & 0xff;
310 u >>= 8;
312 hash_step((const unsigned char*)"1",1);
313 hash_step(x,8);
314 if( g.fDebug & DEBUG_FULLTRACE ){
315 fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
317 break;
319 case SQLITE_FLOAT: {
320 sqlite3_uint64 u;
321 int j;
322 unsigned char x[8];
323 double r = sqlite3_column_double(pStmt,i);
324 memcpy(&u, &r, 8);
325 for(j=7; j>=0; j--){
326 x[j] = u & 0xff;
327 u >>= 8;
329 hash_step((const unsigned char*)"2",1);
330 hash_step(x,8);
331 if( g.fDebug & DEBUG_FULLTRACE ){
332 fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
334 break;
336 case SQLITE_TEXT: {
337 int n = sqlite3_column_bytes(pStmt, i);
338 const unsigned char *z = sqlite3_column_text(pStmt, i);
339 hash_step((const unsigned char*)"3", 1);
340 hash_step(z, n);
341 if( g.fDebug & DEBUG_FULLTRACE ){
342 fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
344 break;
346 case SQLITE_BLOB: {
347 int n = sqlite3_column_bytes(pStmt, i);
348 const unsigned char *z = sqlite3_column_blob(pStmt, i);
349 hash_step((const unsigned char*)"4", 1);
350 hash_step(z, n);
351 if( g.fDebug & DEBUG_FULLTRACE ){
352 fprintf(stderr, "BLOB (%d bytes)\n", n);
354 break;
359 sqlite3_finalize(pStmt);
364 ** Print sketchy documentation for this utility program
366 static void showHelp(void){
367 printf("Usage: %s [options] FILE ...\n", g.zArgv0);
368 printf(
369 "Compute a SHA1 hash on the content of database FILE. System tables such as\n"
370 "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
371 "Options:\n"
372 " --debug N Set debugging flags to N (experts only)\n"
373 " --like PATTERN Only hash tables whose name is LIKE the pattern\n"
374 " --schema-only Only hash the schema - omit table content\n"
375 " --without-schema Only hash table content - omit the schema\n"
379 int main(int argc, char **argv){
380 const char *zDb = 0; /* Name of the database currently being hashed */
381 int i; /* Loop counter */
382 int rc; /* Subroutine return code */
383 char *zErrMsg; /* Error message when opening database */
384 sqlite3_stmt *pStmt; /* An SQLite query */
385 const char *zLike = 0; /* LIKE pattern of tables to hash */
386 int omitSchema = 0; /* True to compute hash on content only */
387 int omitContent = 0; /* True to compute hash on schema only */
388 int nFile = 0; /* Number of input filenames seen */
390 g.zArgv0 = argv[0];
391 sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
392 for(i=1; i<argc; i++){
393 const char *z = argv[i];
394 if( z[0]=='-' ){
395 z++;
396 if( z[0]=='-' ) z++;
397 if( strcmp(z,"debug")==0 ){
398 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
399 g.fDebug = strtol(argv[++i], 0, 0);
400 }else
401 if( strcmp(z,"help")==0 ){
402 showHelp();
403 return 0;
404 }else
405 if( strcmp(z,"like")==0 ){
406 if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
407 if( zLike!=0 ) cmdlineError("only one --like allowed");
408 zLike = argv[++i];
409 }else
410 if( strcmp(z,"schema-only")==0 ){
411 omitContent = 1;
412 }else
413 if( strcmp(z,"without-schema")==0 ){
414 omitSchema = 1;
415 }else
417 cmdlineError("unknown option: %s", argv[i]);
419 }else{
420 nFile++;
421 if( nFile<i ) argv[nFile] = argv[i];
424 if( nFile==0 ){
425 cmdlineError("no input files specified - nothing to do");
427 if( omitSchema && omitContent ){
428 cmdlineError("only one of --without-schema and --omit-schema allowed");
430 if( zLike==0 ) zLike = "%";
432 for(i=1; i<=nFile; i++){
433 static const int openFlags =
434 SQLITE_OPEN_READWRITE | /* Read/write so hot journals can recover */
435 SQLITE_OPEN_URI
437 zDb = argv[i];
438 rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
439 if( rc ){
440 fprintf(stderr, "cannot open database file '%s'\n", zDb);
441 continue;
443 rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_schema", 0, 0, &zErrMsg);
444 if( rc || zErrMsg ){
445 sqlite3_close(g.db);
446 g.db = 0;
447 fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
448 continue;
451 /* Start the hash */
452 hash_init();
454 /* Hash table content */
455 if( !omitContent ){
456 pStmt = db_prepare(
457 "SELECT name FROM sqlite_schema\n"
458 " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
459 " AND name NOT LIKE 'sqlite_%%'\n"
460 " AND name LIKE '%q'\n"
461 " ORDER BY name COLLATE nocase;\n",
462 zLike
464 while( SQLITE_ROW==sqlite3_step(pStmt) ){
465 /* We want rows of the table to be hashed in PRIMARY KEY order.
466 ** Technically, an ORDER BY clause is required to guarantee that
467 ** order. However, though not guaranteed by the documentation, every
468 ** historical version of SQLite has always output rows in PRIMARY KEY
469 ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
470 ** can be safely omitted. */
471 hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
473 sqlite3_finalize(pStmt);
476 /* Hash the database schema */
477 if( !omitSchema ){
478 hash_one_query(
479 "SELECT type, name, tbl_name, sql FROM sqlite_schema\n"
480 " WHERE tbl_name LIKE '%q'\n"
481 " ORDER BY name COLLATE nocase;\n",
482 zLike
486 /* Finish and output the hash and close the database connection. */
487 hash_finish(zDb);
488 sqlite3_close(g.db);
490 return 0;