xref: /sqlite-3.40.0/test/wordcount.c (revision 482dc645)
19ac3c1eaSdrh /*
29ac3c1eaSdrh ** This C program extracts all "words" from an input document and adds them
39ac3c1eaSdrh ** to an SQLite database.  A "word" is any contiguous sequence of alphabetic
49ac3c1eaSdrh ** characters.  All digits, punctuation, and whitespace characters are
59ac3c1eaSdrh ** word separators.  The database stores a single entry for each distinct
69ac3c1eaSdrh ** word together with a count of the number of occurrences of that word.
79ac3c1eaSdrh ** A fresh database is created automatically on each run.
89ac3c1eaSdrh **
99ac3c1eaSdrh **     wordcount DATABASE INPUTFILE
109ac3c1eaSdrh **
119ac3c1eaSdrh ** The INPUTFILE name can be omitted, in which case input it taken from
129ac3c1eaSdrh ** standard input.
139ac3c1eaSdrh **
149ac3c1eaSdrh ** Option:
159ac3c1eaSdrh **
169ac3c1eaSdrh **
179ac3c1eaSdrh ** Modes:
189ac3c1eaSdrh **
199ac3c1eaSdrh ** Insert mode means:
209ac3c1eaSdrh **    (1) INSERT OR IGNORE INTO wordcount VALUES($new,1)
219ac3c1eaSdrh **    (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new -- if (1) is a noop
229ac3c1eaSdrh **
239ac3c1eaSdrh ** Update mode means:
249ac3c1eaSdrh **    (1) INSERT OR IGNORE INTO wordcount VALUES($new,0)
259ac3c1eaSdrh **    (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new
269ac3c1eaSdrh **
279ac3c1eaSdrh ** Replace mode means:
289ac3c1eaSdrh **    (1) REPLACE INTO wordcount
299ac3c1eaSdrh **        VALUES($new,ifnull((SELECT cnt FROM wordcount WHERE word=$new),0)+1);
309ac3c1eaSdrh **
31*482dc645Sdrh ** Upsert mode means:
32*482dc645Sdrh **    (1) INSERT INTO wordcount VALUES($new,1)
33*482dc645Sdrh **            ON CONFLICT(word) DO UPDATE SET cnt=cnt+1
34*482dc645Sdrh **
35ac873261Sdrh ** Select mode means:
36ac873261Sdrh **    (1) SELECT 1 FROM wordcount WHERE word=$new
379ac3c1eaSdrh **    (2) INSERT INTO wordcount VALUES($new,1) -- if (1) returns nothing
389ac3c1eaSdrh **    (3) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new  --if (1) return TRUE
399ac3c1eaSdrh **
40ac873261Sdrh ** Delete mode means:
41ac873261Sdrh **    (1) DELETE FROM wordcount WHERE word=$new
42ac873261Sdrh **
43776f3a2fSdrh ** Query mode means:
44776f3a2fSdrh **    (1) SELECT cnt FROM wordcount WHERE word=$new
45776f3a2fSdrh **
46776f3a2fSdrh ** Note that delete mode and query mode are only useful for preexisting
47776f3a2fSdrh ** databases.  The wordcount table is created using IF NOT EXISTS so this
48776f3a2fSdrh ** utility can be run multiple times on the same database file.  The
49776f3a2fSdrh ** --without-rowid, --nocase, and --pagesize parameters are only effective
50776f3a2fSdrh ** when creating a new database and are harmless no-ops on preexisting
51776f3a2fSdrh ** databases.
52ac873261Sdrh **
539ac3c1eaSdrh ******************************************************************************
549ac3c1eaSdrh **
559ac3c1eaSdrh ** Compile as follows:
569ac3c1eaSdrh **
579ac3c1eaSdrh **    gcc -I. wordcount.c sqlite3.c -ldl -lpthreads
589ac3c1eaSdrh **
599ac3c1eaSdrh ** Or:
609ac3c1eaSdrh **
619ac3c1eaSdrh **    gcc -I. -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
629ac3c1eaSdrh **        wordcount.c sqlite3.c
639ac3c1eaSdrh */
649ac3c1eaSdrh #include <stdio.h>
659ac3c1eaSdrh #include <string.h>
669ac3c1eaSdrh #include <ctype.h>
679ac3c1eaSdrh #include <stdlib.h>
689ac3c1eaSdrh #include <stdarg.h>
699ac3c1eaSdrh #include "sqlite3.h"
70e2b45d17Sdrh #ifndef _WIN32
71e2b45d17Sdrh # include <unistd.h>
72e2b45d17Sdrh #else
73e2b45d17Sdrh # include <io.h>
74e2b45d17Sdrh #endif
75c56fac74Sdrh #define ISALPHA(X) isalpha((unsigned char)(X))
769ac3c1eaSdrh 
77e2b45d17Sdrh const char zHelp[] =
78e2b45d17Sdrh "Usage: wordcount [OPTIONS] DATABASE [INPUT]\n"
79e2b45d17Sdrh " --all                Repeat the test for all test modes\n"
80e2b45d17Sdrh " --cachesize NNN      Use a cache size of NNN\n"
81e2b45d17Sdrh " --commit NNN         Commit after every NNN operations\n"
82e2b45d17Sdrh " --delete             Use DELETE mode\n"
83e2b45d17Sdrh " --insert             Use INSERT mode (the default)\n"
84e2b45d17Sdrh " --journal MMMM       Use PRAGMA journal_mode=MMMM\n"
85e2b45d17Sdrh " --nocase             Add the NOCASE collating sequence to the words.\n"
86e2b45d17Sdrh " --nosync             Use PRAGMA synchronous=OFF\n"
87e2b45d17Sdrh " --pagesize NNN       Use a page size of NNN\n"
88e2b45d17Sdrh " --query              Use QUERY mode\n"
89e2b45d17Sdrh " --replace            Use REPLACE mode\n"
90e2b45d17Sdrh " --select             Use SELECT mode\n"
91e2b45d17Sdrh " --stats              Show sqlite3_status() results at the end.\n"
92e2b45d17Sdrh " --summary            Show summary information on the collected data.\n"
93e2b45d17Sdrh " --tag NAME           Tag all output using NAME.  Use only stdout.\n"
94e2b45d17Sdrh " --timer              Time the operation of this program\n"
95e2b45d17Sdrh " --trace              Enable sqlite3_trace() output.\n"
96e2b45d17Sdrh " --update             Use UPDATE mode\n"
97*482dc645Sdrh " --upsert             Use UPSERT mode\n"
98e2b45d17Sdrh " --without-rowid      Use a WITHOUT ROWID table to store the words.\n"
99e2b45d17Sdrh ;
100e2b45d17Sdrh 
10119eb77bbSdrh /* Output tag */
10219eb77bbSdrh char *zTag = "--";
10319eb77bbSdrh 
104d2b637c2Sdrh /* Return the current wall-clock time */
realTime(void)105d2b637c2Sdrh static sqlite3_int64 realTime(void){
106d2b637c2Sdrh   static sqlite3_vfs *clockVfs = 0;
107d2b637c2Sdrh   sqlite3_int64 t;
108d2b637c2Sdrh   if( clockVfs==0 ) clockVfs = sqlite3_vfs_find(0);
109d2b637c2Sdrh   if( clockVfs->iVersion>=1 && clockVfs->xCurrentTimeInt64!=0 ){
110d2b637c2Sdrh     clockVfs->xCurrentTimeInt64(clockVfs, &t);
111d2b637c2Sdrh   }else{
112d2b637c2Sdrh     double r;
113d2b637c2Sdrh     clockVfs->xCurrentTime(clockVfs, &r);
114d2b637c2Sdrh     t = (sqlite3_int64)(r*86400000.0);
115d2b637c2Sdrh   }
116d2b637c2Sdrh   return t;
117d2b637c2Sdrh }
118d2b637c2Sdrh 
1199ac3c1eaSdrh /* Print an error message and exit */
fatal_error(const char * zMsg,...)1209ac3c1eaSdrh static void fatal_error(const char *zMsg, ...){
1219ac3c1eaSdrh   va_list ap;
1229ac3c1eaSdrh   va_start(ap, zMsg);
1239ac3c1eaSdrh   vfprintf(stderr, zMsg, ap);
1249ac3c1eaSdrh   va_end(ap);
1259ac3c1eaSdrh   exit(1);
1269ac3c1eaSdrh }
1279ac3c1eaSdrh 
128e2b45d17Sdrh /* Print a usage message and quit */
usage(void)129e2b45d17Sdrh static void usage(void){
130e2b45d17Sdrh   printf("%s",zHelp);
131e2b45d17Sdrh   exit(0);
132e2b45d17Sdrh }
133e2b45d17Sdrh 
1349ac3c1eaSdrh /* The sqlite3_trace() callback function */
traceCallback(void * NotUsed,const char * zSql)1359ac3c1eaSdrh static void traceCallback(void *NotUsed, const char *zSql){
1369ac3c1eaSdrh   printf("%s;\n", zSql);
1379ac3c1eaSdrh }
1389ac3c1eaSdrh 
1390afa99e3Sdrh /* An sqlite3_exec() callback that prints results on standard output,
1400afa99e3Sdrh ** each column separated by a single space. */
printResult(void * NotUsed,int nArg,char ** azArg,char ** azNm)1410afa99e3Sdrh static int printResult(void *NotUsed, int nArg, char **azArg, char **azNm){
1420afa99e3Sdrh   int i;
14319eb77bbSdrh   printf("%s", zTag);
1440afa99e3Sdrh   for(i=0; i<nArg; i++){
14555fcab39Sdrh     printf(" %s", azArg[i] ? azArg[i] : "(null)");
1460afa99e3Sdrh   }
1470afa99e3Sdrh   printf("\n");
1480afa99e3Sdrh   return 0;
1490afa99e3Sdrh }
1500afa99e3Sdrh 
1510afa99e3Sdrh 
15224f1985aSdrh /*
15324f1985aSdrh ** Add one character to a hash
15424f1985aSdrh */
addCharToHash(unsigned int * a,unsigned char x)15524f1985aSdrh static void addCharToHash(unsigned int *a, unsigned char x){
15624f1985aSdrh   if( a[0]<4 ){
15724f1985aSdrh     a[1] = (a[1]<<8) | x;
15824f1985aSdrh     a[0]++;
15924f1985aSdrh   }else{
16024f1985aSdrh     a[2] = (a[2]<<8) | x;
16124f1985aSdrh     a[0]++;
16224f1985aSdrh     if( a[0]==8 ){
16324f1985aSdrh       a[3] += a[1] + a[4];
16424f1985aSdrh       a[4] += a[2] + a[3];
16524f1985aSdrh       a[0] = a[1] = a[2] = 0;
16624f1985aSdrh     }
16724f1985aSdrh   }
16824f1985aSdrh }
16924f1985aSdrh 
17024f1985aSdrh /*
17124f1985aSdrh ** Compute the final hash value.
17224f1985aSdrh */
finalHash(unsigned int * a,char * z)17324f1985aSdrh static void finalHash(unsigned int *a, char *z){
17424f1985aSdrh   a[3] += a[1] + a[4] + a[0];
17524f1985aSdrh   a[4] += a[2] + a[3];
17624f1985aSdrh   sqlite3_snprintf(17, z, "%08x%08x", a[3], a[4]);
17724f1985aSdrh }
17824f1985aSdrh 
17924f1985aSdrh 
18024f1985aSdrh /*
18124f1985aSdrh ** Implementation of a checksum() aggregate SQL function
18224f1985aSdrh */
checksumStep(sqlite3_context * context,int argc,sqlite3_value ** argv)18324f1985aSdrh static void checksumStep(
18424f1985aSdrh   sqlite3_context *context,
18524f1985aSdrh   int argc,
18624f1985aSdrh   sqlite3_value **argv
18724f1985aSdrh ){
18824f1985aSdrh   const unsigned char *zVal;
18924f1985aSdrh   int nVal, i, j;
19024f1985aSdrh   unsigned int *a;
19124f1985aSdrh   a = (unsigned*)sqlite3_aggregate_context(context, sizeof(unsigned int)*5);
19224f1985aSdrh 
19324f1985aSdrh   if( a ){
19424f1985aSdrh     for(i=0; i<argc; i++){
19524f1985aSdrh       nVal = sqlite3_value_bytes(argv[i]);
19624f1985aSdrh       zVal = (const unsigned char*)sqlite3_value_text(argv[i]);
19724f1985aSdrh       if( zVal ) for(j=0; j<nVal; j++) addCharToHash(a, zVal[j]);
19824f1985aSdrh       addCharToHash(a, '|');
19924f1985aSdrh     }
20024f1985aSdrh     addCharToHash(a, '\n');
20124f1985aSdrh   }
20224f1985aSdrh }
checksumFinalize(sqlite3_context * context)20324f1985aSdrh static void checksumFinalize(sqlite3_context *context){
20424f1985aSdrh   unsigned int *a;
20524f1985aSdrh   char zResult[24];
20624f1985aSdrh   a = sqlite3_aggregate_context(context, 0);
20724f1985aSdrh   if( a ){
20824f1985aSdrh     finalHash(a, zResult);
20924f1985aSdrh     sqlite3_result_text(context, zResult, -1, SQLITE_TRANSIENT);
21024f1985aSdrh   }
21124f1985aSdrh }
21224f1985aSdrh 
2139ac3c1eaSdrh /* Define operating modes */
2149ac3c1eaSdrh #define MODE_INSERT     0
2159ac3c1eaSdrh #define MODE_REPLACE    1
216*482dc645Sdrh #define MODE_UPSERT     2
217*482dc645Sdrh #define MODE_SELECT     3
218*482dc645Sdrh #define MODE_UPDATE     4
219*482dc645Sdrh #define MODE_DELETE     5
220*482dc645Sdrh #define MODE_QUERY      6
221*482dc645Sdrh #define MODE_COUNT      7
222e2b45d17Sdrh #define MODE_ALL      (-1)
223e2b45d17Sdrh 
224e2b45d17Sdrh /* Mode names */
225e2b45d17Sdrh static const char *azMode[] = {
226e2b45d17Sdrh   "--insert",
227e2b45d17Sdrh   "--replace",
228*482dc645Sdrh   "--upsert",
229e2b45d17Sdrh   "--select",
230e2b45d17Sdrh   "--update",
231e2b45d17Sdrh   "--delete",
232e2b45d17Sdrh   "--query"
233e2b45d17Sdrh };
234e2b45d17Sdrh 
235e2b45d17Sdrh /*
236e2b45d17Sdrh ** Determine if another iteration of the test is required.  Return true
237e2b45d17Sdrh ** if so.  Return zero if all iterations have finished.
238e2b45d17Sdrh */
allLoop(int iMode,int * piLoopCnt,int * piMode2,int * pUseWithoutRowid)239e2b45d17Sdrh static int allLoop(
240e2b45d17Sdrh   int iMode,                /* The selected test mode */
241e2b45d17Sdrh   int *piLoopCnt,           /* Iteration loop counter */
242e2b45d17Sdrh   int *piMode2,             /* The test mode to use on the next iteration */
243e2b45d17Sdrh   int *pUseWithoutRowid     /* Whether or not to use --without-rowid */
244e2b45d17Sdrh ){
245e2b45d17Sdrh   int i;
246e2b45d17Sdrh   if( iMode!=MODE_ALL ){
247e2b45d17Sdrh     if( *piLoopCnt ) return 0;
248e2b45d17Sdrh     *piMode2 = iMode;
249e2b45d17Sdrh     *piLoopCnt = 1;
250e2b45d17Sdrh     return 1;
251e2b45d17Sdrh   }
252e2b45d17Sdrh   if( (*piLoopCnt)>=MODE_COUNT*2 ) return 0;
253e2b45d17Sdrh   i = (*piLoopCnt)++;
254e2b45d17Sdrh   *pUseWithoutRowid = i&1;
255e2b45d17Sdrh   *piMode2 = i>>1;
256e2b45d17Sdrh   return 1;
257e2b45d17Sdrh }
2589ac3c1eaSdrh 
main(int argc,char ** argv)2599ac3c1eaSdrh int main(int argc, char **argv){
2609ac3c1eaSdrh   const char *zFileToRead = 0;  /* Input file.  NULL for stdin */
2619ac3c1eaSdrh   const char *zDbName = 0;      /* Name of the database file to create */
2629ac3c1eaSdrh   int useWithoutRowid = 0;      /* True for --without-rowid */
2639ac3c1eaSdrh   int iMode = MODE_INSERT;      /* One of MODE_xxxxx */
264e2b45d17Sdrh   int iMode2;                   /* Mode to use for current --all iteration */
265e2b45d17Sdrh   int iLoopCnt = 0;             /* Which iteration when running --all */
2669ac3c1eaSdrh   int useNocase = 0;            /* True for --nocase */
2679ac3c1eaSdrh   int doTrace = 0;              /* True for --trace */
2680afa99e3Sdrh   int showStats = 0;            /* True for --stats */
2690afa99e3Sdrh   int showSummary = 0;          /* True for --summary */
270d2b637c2Sdrh   int showTimer = 0;            /* True for --timer */
271ac873261Sdrh   int cacheSize = 0;            /* Desired cache size.  0 means default */
272ac873261Sdrh   int pageSize = 0;             /* Desired page size.  0 means default */
273ac873261Sdrh   int commitInterval = 0;       /* How often to commit.  0 means never */
274ac873261Sdrh   int noSync = 0;               /* True for --nosync */
275ac873261Sdrh   const char *zJMode = 0;       /* Journal mode */
276ac873261Sdrh   int nOp = 0;                  /* Operation counter */
2779ac3c1eaSdrh   int i, j;                     /* Loop counters */
2789ac3c1eaSdrh   sqlite3 *db;                  /* The SQLite database connection */
2799ac3c1eaSdrh   char *zSql;                   /* Constructed SQL statement */
2809ac3c1eaSdrh   sqlite3_stmt *pInsert = 0;    /* The INSERT statement */
2819ac3c1eaSdrh   sqlite3_stmt *pUpdate = 0;    /* The UPDATE statement */
2829ac3c1eaSdrh   sqlite3_stmt *pSelect = 0;    /* The SELECT statement */
283ac873261Sdrh   sqlite3_stmt *pDelete = 0;    /* The DELETE statement */
2849ac3c1eaSdrh   FILE *in;                     /* The open input file */
2859ac3c1eaSdrh   int rc;                       /* Return code from an SQLite interface */
2869ac3c1eaSdrh   int iCur, iHiwtr;             /* Statistics values, current and "highwater" */
28719eb77bbSdrh   FILE *pTimer = stderr;        /* Output channel for the timer */
288776f3a2fSdrh   sqlite3_int64 sumCnt = 0;     /* Sum in QUERY mode */
289e2b45d17Sdrh   sqlite3_int64 startTime;      /* Time of start */
290e2b45d17Sdrh   sqlite3_int64 totalTime = 0;  /* Total time */
2919ac3c1eaSdrh   char zInput[2000];            /* A single line of input */
2929ac3c1eaSdrh 
2939ac3c1eaSdrh   /* Process command-line arguments */
2949ac3c1eaSdrh   for(i=1; i<argc; i++){
2959ac3c1eaSdrh     const char *z = argv[i];
2969ac3c1eaSdrh     if( z[0]=='-' ){
2979ac3c1eaSdrh       do{ z++; }while( z[0]=='-' );
2989ac3c1eaSdrh       if( strcmp(z,"without-rowid")==0 ){
2999ac3c1eaSdrh         useWithoutRowid = 1;
3009ac3c1eaSdrh       }else if( strcmp(z,"replace")==0 ){
3019ac3c1eaSdrh         iMode = MODE_REPLACE;
302*482dc645Sdrh       }else if( strcmp(z,"upsert")==0 ){
303*482dc645Sdrh         iMode = MODE_UPSERT;
3049ac3c1eaSdrh       }else if( strcmp(z,"select")==0 ){
3059ac3c1eaSdrh         iMode = MODE_SELECT;
3069ac3c1eaSdrh       }else if( strcmp(z,"insert")==0 ){
3079ac3c1eaSdrh         iMode = MODE_INSERT;
3089ac3c1eaSdrh       }else if( strcmp(z,"update")==0 ){
3099ac3c1eaSdrh         iMode = MODE_UPDATE;
310ac873261Sdrh       }else if( strcmp(z,"delete")==0 ){
311ac873261Sdrh         iMode = MODE_DELETE;
312776f3a2fSdrh       }else if( strcmp(z,"query")==0 ){
313776f3a2fSdrh         iMode = MODE_QUERY;
314e2b45d17Sdrh       }else if( strcmp(z,"all")==0 ){
315e2b45d17Sdrh         iMode = MODE_ALL;
316e2b45d17Sdrh         showTimer = -99;
3179ac3c1eaSdrh       }else if( strcmp(z,"nocase")==0 ){
3189ac3c1eaSdrh         useNocase = 1;
3199ac3c1eaSdrh       }else if( strcmp(z,"trace")==0 ){
3209ac3c1eaSdrh         doTrace = 1;
321ac873261Sdrh       }else if( strcmp(z,"nosync")==0 ){
322ac873261Sdrh         noSync = 1;
3230afa99e3Sdrh       }else if( strcmp(z,"stats")==0 ){
3240afa99e3Sdrh         showStats = 1;
3250afa99e3Sdrh       }else if( strcmp(z,"summary")==0 ){
3260afa99e3Sdrh         showSummary = 1;
327d2b637c2Sdrh       }else if( strcmp(z,"timer")==0 ){
328d2b637c2Sdrh         showTimer = i;
329ac873261Sdrh       }else if( strcmp(z,"cachesize")==0 && i<argc-1 ){
330ac873261Sdrh         i++;
331ac873261Sdrh         cacheSize = atoi(argv[i]);
332ac873261Sdrh       }else if( strcmp(z,"pagesize")==0 && i<argc-1 ){
333ac873261Sdrh         i++;
334ac873261Sdrh         pageSize = atoi(argv[i]);
335ac873261Sdrh       }else if( strcmp(z,"commit")==0 && i<argc-1 ){
336ac873261Sdrh         i++;
337ac873261Sdrh         commitInterval = atoi(argv[i]);
338ac873261Sdrh       }else if( strcmp(z,"journal")==0 && i<argc-1 ){
339ac873261Sdrh         zJMode = argv[++i];
34019eb77bbSdrh       }else if( strcmp(z,"tag")==0 && i<argc-1 ){
34119eb77bbSdrh         zTag = argv[++i];
34219eb77bbSdrh         pTimer = stdout;
343e2b45d17Sdrh       }else if( strcmp(z, "help")==0 || strcmp(z,"?")==0 ){
344e2b45d17Sdrh         usage();
3459ac3c1eaSdrh       }else{
346e2b45d17Sdrh         fatal_error("unknown option: \"%s\"\n"
347e2b45d17Sdrh                     "Use --help for a list of options\n",
348e2b45d17Sdrh                     argv[i]);
3499ac3c1eaSdrh       }
3509ac3c1eaSdrh     }else if( zDbName==0 ){
3519ac3c1eaSdrh       zDbName = argv[i];
3529ac3c1eaSdrh     }else if( zFileToRead==0 ){
3539ac3c1eaSdrh       zFileToRead = argv[i];
3549ac3c1eaSdrh     }else{
355e2b45d17Sdrh       fatal_error("surplus argument: \"%s\"\n", argv[i]);
3569ac3c1eaSdrh     }
3579ac3c1eaSdrh   }
3589ac3c1eaSdrh   if( zDbName==0 ){
359e2b45d17Sdrh     usage();
3609ac3c1eaSdrh   }
361d2b637c2Sdrh   startTime = realTime();
3629ac3c1eaSdrh 
3639ac3c1eaSdrh   /* Open the database and the input file */
364e2b45d17Sdrh   if( zDbName[0] && strcmp(zDbName,":memory:")!=0 ){
365e2b45d17Sdrh     unlink(zDbName);
366e2b45d17Sdrh   }
3679ac3c1eaSdrh   if( sqlite3_open(zDbName, &db) ){
3689ac3c1eaSdrh     fatal_error("Cannot open database file: %s\n", zDbName);
3699ac3c1eaSdrh   }
3709ac3c1eaSdrh   if( zFileToRead ){
3719ac3c1eaSdrh     in = fopen(zFileToRead, "rb");
3729ac3c1eaSdrh     if( in==0 ){
3739ac3c1eaSdrh       fatal_error("Could not open input file \"%s\"\n", zFileToRead);
3749ac3c1eaSdrh     }
3759ac3c1eaSdrh   }else{
376e2b45d17Sdrh     if( iMode==MODE_ALL ){
377e2b45d17Sdrh       fatal_error("The --all mode cannot be used with stdin\n");
378e2b45d17Sdrh     }
3799ac3c1eaSdrh     in = stdin;
3809ac3c1eaSdrh   }
3819ac3c1eaSdrh 
382ac873261Sdrh   /* Set database connection options */
3839ac3c1eaSdrh   if( doTrace ) sqlite3_trace(db, traceCallback, 0);
384ac873261Sdrh   if( pageSize ){
385ac873261Sdrh     zSql = sqlite3_mprintf("PRAGMA page_size=%d", pageSize);
386ac873261Sdrh     sqlite3_exec(db, zSql, 0, 0, 0);
387ac873261Sdrh     sqlite3_free(zSql);
388ac873261Sdrh   }
389ac873261Sdrh   if( cacheSize ){
390ac873261Sdrh     zSql = sqlite3_mprintf("PRAGMA cache_size=%d", cacheSize);
391ac873261Sdrh     sqlite3_exec(db, zSql, 0, 0, 0);
392ac873261Sdrh     sqlite3_free(zSql);
393ac873261Sdrh   }
394ac873261Sdrh   if( noSync ) sqlite3_exec(db, "PRAGMA synchronous=OFF", 0, 0, 0);
395ac873261Sdrh   if( zJMode ){
396ac873261Sdrh     zSql = sqlite3_mprintf("PRAGMA journal_mode=%s", zJMode);
397ac873261Sdrh     sqlite3_exec(db, zSql, 0, 0, 0);
398ac873261Sdrh     sqlite3_free(zSql);
399ac873261Sdrh   }
400ac873261Sdrh 
401e2b45d17Sdrh   iLoopCnt = 0;
402e2b45d17Sdrh   while( allLoop(iMode, &iLoopCnt, &iMode2, &useWithoutRowid) ){
403e2b45d17Sdrh     /* Delete prior content in --all mode */
404e2b45d17Sdrh     if( iMode==MODE_ALL ){
405e2b45d17Sdrh       if( sqlite3_exec(db, "DROP TABLE IF EXISTS wordcount; VACUUM;",0,0,0) ){
406e2b45d17Sdrh         fatal_error("Could not clean up prior iteration\n");
407e2b45d17Sdrh       }
408e2b45d17Sdrh       startTime = realTime();
409e2b45d17Sdrh       rewind(in);
410e2b45d17Sdrh     }
411ac873261Sdrh 
412ac873261Sdrh     /* Construct the "wordcount" table into which to put the words */
4139ac3c1eaSdrh     if( sqlite3_exec(db, "BEGIN IMMEDIATE", 0, 0, 0) ){
4149ac3c1eaSdrh       fatal_error("Could not start a transaction\n");
4159ac3c1eaSdrh     }
4169ac3c1eaSdrh     zSql = sqlite3_mprintf(
417ac873261Sdrh        "CREATE TABLE IF NOT EXISTS wordcount(\n"
4189ac3c1eaSdrh        "  word TEXT PRIMARY KEY COLLATE %s,\n"
4199ac3c1eaSdrh        "  cnt INTEGER\n"
4209ac3c1eaSdrh        ")%s",
4219ac3c1eaSdrh        useNocase ? "nocase" : "binary",
4229ac3c1eaSdrh        useWithoutRowid ? " WITHOUT ROWID" : ""
4239ac3c1eaSdrh     );
4249ac3c1eaSdrh     if( zSql==0 ) fatal_error("out of memory\n");
4259ac3c1eaSdrh     rc = sqlite3_exec(db, zSql, 0, 0, 0);
4269ac3c1eaSdrh     if( rc ) fatal_error("Could not create the wordcount table: %s.\n",
4279ac3c1eaSdrh                          sqlite3_errmsg(db));
4289ac3c1eaSdrh     sqlite3_free(zSql);
4299ac3c1eaSdrh 
4309ac3c1eaSdrh     /* Prepare SQL statements that will be needed */
431e2b45d17Sdrh     if( iMode2==MODE_QUERY ){
432776f3a2fSdrh       rc = sqlite3_prepare_v2(db,
433776f3a2fSdrh             "SELECT cnt FROM wordcount WHERE word=?1",
434776f3a2fSdrh             -1, &pSelect, 0);
435776f3a2fSdrh       if( rc ) fatal_error("Could not prepare the SELECT statement: %s\n",
436776f3a2fSdrh                             sqlite3_errmsg(db));
437776f3a2fSdrh     }
438e2b45d17Sdrh     if( iMode2==MODE_SELECT ){
4399ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4409ac3c1eaSdrh             "SELECT 1 FROM wordcount WHERE word=?1",
4419ac3c1eaSdrh             -1, &pSelect, 0);
4429ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the SELECT statement: %s\n",
4439ac3c1eaSdrh                             sqlite3_errmsg(db));
4449ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4459ac3c1eaSdrh             "INSERT INTO wordcount(word,cnt) VALUES(?1,1)",
4469ac3c1eaSdrh             -1, &pInsert, 0);
4479ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4489ac3c1eaSdrh                            sqlite3_errmsg(db));
4499ac3c1eaSdrh     }
450e2b45d17Sdrh     if( iMode2==MODE_SELECT || iMode2==MODE_UPDATE || iMode2==MODE_INSERT ){
4519ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4529ac3c1eaSdrh             "UPDATE wordcount SET cnt=cnt+1 WHERE word=?1",
4539ac3c1eaSdrh             -1, &pUpdate, 0);
4549ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the UPDATE statement: %s\n",
4559ac3c1eaSdrh                            sqlite3_errmsg(db));
4569ac3c1eaSdrh     }
457e2b45d17Sdrh     if( iMode2==MODE_INSERT ){
4589ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4599ac3c1eaSdrh             "INSERT OR IGNORE INTO wordcount(word,cnt) VALUES(?1,1)",
4609ac3c1eaSdrh             -1, &pInsert, 0);
4619ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4629ac3c1eaSdrh                            sqlite3_errmsg(db));
4639ac3c1eaSdrh     }
464e2b45d17Sdrh     if( iMode2==MODE_UPDATE ){
4659ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4669ac3c1eaSdrh             "INSERT OR IGNORE INTO wordcount(word,cnt) VALUES(?1,0)",
4679ac3c1eaSdrh             -1, &pInsert, 0);
4689ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4699ac3c1eaSdrh                            sqlite3_errmsg(db));
4709ac3c1eaSdrh     }
471e2b45d17Sdrh     if( iMode2==MODE_REPLACE ){
4729ac3c1eaSdrh       rc = sqlite3_prepare_v2(db,
4739ac3c1eaSdrh           "REPLACE INTO wordcount(word,cnt)"
4749ac3c1eaSdrh           "VALUES(?1,coalesce((SELECT cnt FROM wordcount WHERE word=?1),0)+1)",
4759ac3c1eaSdrh           -1, &pInsert, 0);
4769ac3c1eaSdrh       if( rc ) fatal_error("Could not prepare the REPLACE statement: %s\n",
4779ac3c1eaSdrh                             sqlite3_errmsg(db));
4789ac3c1eaSdrh     }
479*482dc645Sdrh     if( iMode2==MODE_UPSERT ){
480*482dc645Sdrh       rc = sqlite3_prepare_v2(db,
481*482dc645Sdrh           "INSERT INTO wordcount(word,cnt) VALUES(?1,1) "
482*482dc645Sdrh           "ON CONFLICT(word) DO UPDATE SET cnt=cnt+1",
483*482dc645Sdrh           -1, &pInsert, 0);
484*482dc645Sdrh       if( rc ) fatal_error("Could not prepare the UPSERT statement: %s\n",
485*482dc645Sdrh                             sqlite3_errmsg(db));
486*482dc645Sdrh     }
487e2b45d17Sdrh     if( iMode2==MODE_DELETE ){
488ac873261Sdrh       rc = sqlite3_prepare_v2(db,
489ac873261Sdrh             "DELETE FROM wordcount WHERE word=?1",
490ac873261Sdrh             -1, &pDelete, 0);
491ac873261Sdrh       if( rc ) fatal_error("Could not prepare the DELETE statement: %s\n",
492ac873261Sdrh                            sqlite3_errmsg(db));
493ac873261Sdrh     }
4949ac3c1eaSdrh 
4959ac3c1eaSdrh     /* Process the input file */
4969ac3c1eaSdrh     while( fgets(zInput, sizeof(zInput), in) ){
4979ac3c1eaSdrh       for(i=0; zInput[i]; i++){
498c56fac74Sdrh         if( !ISALPHA(zInput[i]) ) continue;
499c56fac74Sdrh         for(j=i+1; ISALPHA(zInput[j]); j++){}
5009ac3c1eaSdrh 
5019ac3c1eaSdrh         /* Found a new word at zInput[i] that is j-i bytes long.
5029ac3c1eaSdrh         ** Process it into the wordcount table.  */
503e2b45d17Sdrh         if( iMode2==MODE_DELETE ){
504ac873261Sdrh           sqlite3_bind_text(pDelete, 1, zInput+i, j-i, SQLITE_STATIC);
505ac873261Sdrh           if( sqlite3_step(pDelete)!=SQLITE_DONE ){
506ac873261Sdrh             fatal_error("DELETE failed: %s\n", sqlite3_errmsg(db));
507ac873261Sdrh           }
508ac873261Sdrh           sqlite3_reset(pDelete);
509e2b45d17Sdrh         }else if( iMode2==MODE_SELECT ){
5109ac3c1eaSdrh           sqlite3_bind_text(pSelect, 1, zInput+i, j-i, SQLITE_STATIC);
5119ac3c1eaSdrh           rc = sqlite3_step(pSelect);
5129ac3c1eaSdrh           sqlite3_reset(pSelect);
5139ac3c1eaSdrh           if( rc==SQLITE_ROW ){
5149ac3c1eaSdrh             sqlite3_bind_text(pUpdate, 1, zInput+i, j-i, SQLITE_STATIC);
5159ac3c1eaSdrh             if( sqlite3_step(pUpdate)!=SQLITE_DONE ){
5169ac3c1eaSdrh               fatal_error("UPDATE failed: %s\n", sqlite3_errmsg(db));
5179ac3c1eaSdrh             }
5189ac3c1eaSdrh             sqlite3_reset(pUpdate);
5199ac3c1eaSdrh           }else if( rc==SQLITE_DONE ){
5209ac3c1eaSdrh             sqlite3_bind_text(pInsert, 1, zInput+i, j-i, SQLITE_STATIC);
5219ac3c1eaSdrh             if( sqlite3_step(pInsert)!=SQLITE_DONE ){
5229ac3c1eaSdrh               fatal_error("Insert failed: %s\n", sqlite3_errmsg(db));
5239ac3c1eaSdrh             }
5249ac3c1eaSdrh             sqlite3_reset(pInsert);
5259ac3c1eaSdrh           }else{
5269ac3c1eaSdrh             fatal_error("SELECT failed: %s\n", sqlite3_errmsg(db));
5279ac3c1eaSdrh           }
528e2b45d17Sdrh         }else if( iMode2==MODE_QUERY ){
529776f3a2fSdrh           sqlite3_bind_text(pSelect, 1, zInput+i, j-i, SQLITE_STATIC);
530776f3a2fSdrh           if( sqlite3_step(pSelect)==SQLITE_ROW ){
531776f3a2fSdrh             sumCnt += sqlite3_column_int64(pSelect, 0);
532776f3a2fSdrh           }
533776f3a2fSdrh           sqlite3_reset(pSelect);
5349ac3c1eaSdrh         }else{
5359ac3c1eaSdrh           sqlite3_bind_text(pInsert, 1, zInput+i, j-i, SQLITE_STATIC);
5369ac3c1eaSdrh           if( sqlite3_step(pInsert)!=SQLITE_DONE ){
5379ac3c1eaSdrh             fatal_error("INSERT failed: %s\n", sqlite3_errmsg(db));
5389ac3c1eaSdrh           }
5399ac3c1eaSdrh           sqlite3_reset(pInsert);
540e2b45d17Sdrh           if( iMode2==MODE_UPDATE
541e2b45d17Sdrh            || (iMode2==MODE_INSERT && sqlite3_changes(db)==0)
5429ac3c1eaSdrh           ){
5439ac3c1eaSdrh             sqlite3_bind_text(pUpdate, 1, zInput+i, j-i, SQLITE_STATIC);
5449ac3c1eaSdrh             if( sqlite3_step(pUpdate)!=SQLITE_DONE ){
5459ac3c1eaSdrh               fatal_error("UPDATE failed: %s\n", sqlite3_errmsg(db));
5469ac3c1eaSdrh             }
5479ac3c1eaSdrh             sqlite3_reset(pUpdate);
5489ac3c1eaSdrh           }
5499ac3c1eaSdrh         }
5509ac3c1eaSdrh         i = j-1;
551ac873261Sdrh 
552ac873261Sdrh         /* Increment the operation counter.  Do a COMMIT if it is time. */
553ac873261Sdrh         nOp++;
554ac873261Sdrh         if( commitInterval>0 && (nOp%commitInterval)==0 ){
555ac873261Sdrh           sqlite3_exec(db, "COMMIT; BEGIN IMMEDIATE", 0, 0, 0);
556ac873261Sdrh         }
5579ac3c1eaSdrh       }
5589ac3c1eaSdrh     }
5599ac3c1eaSdrh     sqlite3_exec(db, "COMMIT", 0, 0, 0);
560e2b45d17Sdrh     sqlite3_finalize(pInsert);  pInsert = 0;
561e2b45d17Sdrh     sqlite3_finalize(pUpdate);  pUpdate = 0;
562e2b45d17Sdrh     sqlite3_finalize(pSelect);  pSelect = 0;
563e2b45d17Sdrh     sqlite3_finalize(pDelete);  pDelete = 0;
5649ac3c1eaSdrh 
565e2b45d17Sdrh     if( iMode2==MODE_QUERY && iMode!=MODE_ALL ){
56619eb77bbSdrh       printf("%s sum of cnt: %lld\n", zTag, sumCnt);
567776f3a2fSdrh       rc = sqlite3_prepare_v2(db,"SELECT sum(cnt*cnt) FROM wordcount", -1,
568776f3a2fSdrh                               &pSelect, 0);
569776f3a2fSdrh       if( rc==SQLITE_OK && sqlite3_step(pSelect)==SQLITE_ROW ){
57019eb77bbSdrh         printf("%s double-check: %lld\n", zTag,sqlite3_column_int64(pSelect,0));
571776f3a2fSdrh       }
572776f3a2fSdrh       sqlite3_finalize(pSelect);
573776f3a2fSdrh     }
574776f3a2fSdrh 
575d2b637c2Sdrh 
576d2b637c2Sdrh     if( showTimer ){
577d2b637c2Sdrh       sqlite3_int64 elapseTime = realTime() - startTime;
578e2b45d17Sdrh       totalTime += elapseTime;
57919eb77bbSdrh       fprintf(pTimer, "%3d.%03d wordcount", (int)(elapseTime/1000),
5807a909855Sdrh                                    (int)(elapseTime%1000));
581e2b45d17Sdrh       if( iMode==MODE_ALL ){
582e2b45d17Sdrh         fprintf(pTimer, " %s%s\n", azMode[iMode2],
583e2b45d17Sdrh                 useWithoutRowid? " --without-rowid" : "");
584e2b45d17Sdrh       }else{
58519eb77bbSdrh         for(i=1; i<argc; i++) if( i!=showTimer ) fprintf(pTimer," %s",argv[i]);
58619eb77bbSdrh         fprintf(pTimer, "\n");
587d2b637c2Sdrh       }
588e2b45d17Sdrh     }
589d2b637c2Sdrh 
5900afa99e3Sdrh     if( showSummary ){
59124f1985aSdrh       sqlite3_create_function(db, "checksum", -1, SQLITE_UTF8, 0,
59224f1985aSdrh                               0, checksumStep, checksumFinalize);
5930afa99e3Sdrh       sqlite3_exec(db,
59424f1985aSdrh         "SELECT 'count(*):  ', count(*) FROM wordcount;\n"
59524f1985aSdrh         "SELECT 'sum(cnt):  ', sum(cnt) FROM wordcount;\n"
59624f1985aSdrh         "SELECT 'max(cnt):  ', max(cnt) FROM wordcount;\n"
59724f1985aSdrh         "SELECT 'avg(cnt):  ', avg(cnt) FROM wordcount;\n"
59824f1985aSdrh         "SELECT 'sum(cnt=1):', sum(cnt=1) FROM wordcount;\n"
59924f1985aSdrh         "SELECT 'top 10:    ', group_concat(word, ', ') FROM "
600aedfc507Sdrh            "(SELECT word FROM wordcount ORDER BY cnt DESC, word LIMIT 10);\n"
60124f1985aSdrh         "SELECT 'checksum:  ', checksum(word, cnt) FROM "
60224f1985aSdrh            "(SELECT word, cnt FROM wordcount ORDER BY word);\n"
60324f1985aSdrh         "PRAGMA integrity_check;\n",
6040afa99e3Sdrh         printResult, 0, 0);
6050afa99e3Sdrh     }
606e2b45d17Sdrh   } /* End the --all loop */
607e2b45d17Sdrh 
608e2b45d17Sdrh   /* Close the input file after the last read */
609e2b45d17Sdrh   if( zFileToRead ) fclose(in);
610e2b45d17Sdrh 
611e2b45d17Sdrh   /* In --all mode, so the total time */
612e2b45d17Sdrh   if( iMode==MODE_ALL && showTimer ){
613e2b45d17Sdrh     fprintf(pTimer, "%3d.%03d wordcount --all\n", (int)(totalTime/1000),
614e2b45d17Sdrh                                    (int)(totalTime%1000));
615e2b45d17Sdrh   }
6160afa99e3Sdrh 
6179ac3c1eaSdrh   /* Database connection statistics printed after both prepared statements
6189ac3c1eaSdrh   ** have been finalized */
6190afa99e3Sdrh   if( showStats ){
6209ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, 0);
62119eb77bbSdrh     printf("%s Lookaside Slots Used:        %d (max %d)\n", zTag, iCur,iHiwtr);
6229ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, 0);
62319eb77bbSdrh     printf("%s Successful lookasides:       %d\n", zTag, iHiwtr);
6249ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur,&iHiwtr,0);
62519eb77bbSdrh     printf("%s Lookaside size faults:       %d\n", zTag, iHiwtr);
6269ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur,&iHiwtr,0);
62719eb77bbSdrh     printf("%s Lookaside OOM faults:        %d\n", zTag, iHiwtr);
6289ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_USED, &iCur, &iHiwtr, 0);
62919eb77bbSdrh     printf("%s Pager Heap Usage:            %d bytes\n", zTag, iCur);
6309ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_HIT, &iCur, &iHiwtr, 1);
63119eb77bbSdrh     printf("%s Page cache hits:             %d\n", zTag, iCur);
6329ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_MISS, &iCur, &iHiwtr, 1);
63319eb77bbSdrh     printf("%s Page cache misses:           %d\n", zTag, iCur);
6349ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_WRITE, &iCur, &iHiwtr, 1);
63519eb77bbSdrh     printf("%s Page cache writes:           %d\n", zTag, iCur);
6369ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_SCHEMA_USED, &iCur, &iHiwtr, 0);
63719eb77bbSdrh     printf("%s Schema Heap Usage:           %d bytes\n", zTag, iCur);
6389ac3c1eaSdrh     sqlite3_db_status(db, SQLITE_DBSTATUS_STMT_USED, &iCur, &iHiwtr, 0);
63919eb77bbSdrh     printf("%s Statement Heap Usage:        %d bytes\n", zTag, iCur);
6400afa99e3Sdrh   }
6419ac3c1eaSdrh 
6429ac3c1eaSdrh   sqlite3_close(db);
6439ac3c1eaSdrh 
6449ac3c1eaSdrh   /* Global memory usage statistics printed after the database connection
6459ac3c1eaSdrh   ** has closed.  Memory usage should be zero at this point. */
6460afa99e3Sdrh   if( showStats ){
6479ac3c1eaSdrh     sqlite3_status(SQLITE_STATUS_MEMORY_USED, &iCur, &iHiwtr, 0);
64819eb77bbSdrh     printf("%s Memory Used (bytes):         %d (max %d)\n", zTag,iCur,iHiwtr);
6499ac3c1eaSdrh     sqlite3_status(SQLITE_STATUS_MALLOC_COUNT, &iCur, &iHiwtr, 0);
65019eb77bbSdrh     printf("%s Outstanding Allocations:     %d (max %d)\n",zTag,iCur,iHiwtr);
6519ac3c1eaSdrh     sqlite3_status(SQLITE_STATUS_PAGECACHE_OVERFLOW, &iCur, &iHiwtr, 0);
65219eb77bbSdrh     printf("%s Pcache Overflow Bytes:       %d (max %d)\n",zTag,iCur,iHiwtr);
6539ac3c1eaSdrh     sqlite3_status(SQLITE_STATUS_MALLOC_SIZE, &iCur, &iHiwtr, 0);
65419eb77bbSdrh     printf("%s Largest Allocation:          %d bytes\n",zTag,iHiwtr);
6559ac3c1eaSdrh     sqlite3_status(SQLITE_STATUS_PAGECACHE_SIZE, &iCur, &iHiwtr, 0);
65619eb77bbSdrh     printf("%s Largest Pcache Allocation:   %d bytes\n",zTag,iHiwtr);
6570afa99e3Sdrh   }
6589ac3c1eaSdrh   return 0;
6599ac3c1eaSdrh }
660