19ac3c1eaSdrh /*
29ac3c1eaSdrh ** This C program extracts all "words" from an input document and adds them
39ac3c1eaSdrh ** to an SQLite database. A "word" is any contiguous sequence of alphabetic
49ac3c1eaSdrh ** characters. All digits, punctuation, and whitespace characters are
59ac3c1eaSdrh ** word separators. The database stores a single entry for each distinct
69ac3c1eaSdrh ** word together with a count of the number of occurrences of that word.
79ac3c1eaSdrh ** A fresh database is created automatically on each run.
89ac3c1eaSdrh **
99ac3c1eaSdrh ** wordcount DATABASE INPUTFILE
109ac3c1eaSdrh **
119ac3c1eaSdrh ** The INPUTFILE name can be omitted, in which case input it taken from
129ac3c1eaSdrh ** standard input.
139ac3c1eaSdrh **
149ac3c1eaSdrh ** Option:
159ac3c1eaSdrh **
169ac3c1eaSdrh **
179ac3c1eaSdrh ** Modes:
189ac3c1eaSdrh **
199ac3c1eaSdrh ** Insert mode means:
209ac3c1eaSdrh ** (1) INSERT OR IGNORE INTO wordcount VALUES($new,1)
219ac3c1eaSdrh ** (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new -- if (1) is a noop
229ac3c1eaSdrh **
239ac3c1eaSdrh ** Update mode means:
249ac3c1eaSdrh ** (1) INSERT OR IGNORE INTO wordcount VALUES($new,0)
259ac3c1eaSdrh ** (2) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new
269ac3c1eaSdrh **
279ac3c1eaSdrh ** Replace mode means:
289ac3c1eaSdrh ** (1) REPLACE INTO wordcount
299ac3c1eaSdrh ** VALUES($new,ifnull((SELECT cnt FROM wordcount WHERE word=$new),0)+1);
309ac3c1eaSdrh **
31*482dc645Sdrh ** Upsert mode means:
32*482dc645Sdrh ** (1) INSERT INTO wordcount VALUES($new,1)
33*482dc645Sdrh ** ON CONFLICT(word) DO UPDATE SET cnt=cnt+1
34*482dc645Sdrh **
35ac873261Sdrh ** Select mode means:
36ac873261Sdrh ** (1) SELECT 1 FROM wordcount WHERE word=$new
379ac3c1eaSdrh ** (2) INSERT INTO wordcount VALUES($new,1) -- if (1) returns nothing
389ac3c1eaSdrh ** (3) UPDATE wordcount SET cnt=cnt+1 WHERE word=$new --if (1) return TRUE
399ac3c1eaSdrh **
40ac873261Sdrh ** Delete mode means:
41ac873261Sdrh ** (1) DELETE FROM wordcount WHERE word=$new
42ac873261Sdrh **
43776f3a2fSdrh ** Query mode means:
44776f3a2fSdrh ** (1) SELECT cnt FROM wordcount WHERE word=$new
45776f3a2fSdrh **
46776f3a2fSdrh ** Note that delete mode and query mode are only useful for preexisting
47776f3a2fSdrh ** databases. The wordcount table is created using IF NOT EXISTS so this
48776f3a2fSdrh ** utility can be run multiple times on the same database file. The
49776f3a2fSdrh ** --without-rowid, --nocase, and --pagesize parameters are only effective
50776f3a2fSdrh ** when creating a new database and are harmless no-ops on preexisting
51776f3a2fSdrh ** databases.
52ac873261Sdrh **
539ac3c1eaSdrh ******************************************************************************
549ac3c1eaSdrh **
559ac3c1eaSdrh ** Compile as follows:
569ac3c1eaSdrh **
579ac3c1eaSdrh ** gcc -I. wordcount.c sqlite3.c -ldl -lpthreads
589ac3c1eaSdrh **
599ac3c1eaSdrh ** Or:
609ac3c1eaSdrh **
619ac3c1eaSdrh ** gcc -I. -DSQLITE_THREADSAFE=0 -DSQLITE_OMIT_LOAD_EXTENSION \
629ac3c1eaSdrh ** wordcount.c sqlite3.c
639ac3c1eaSdrh */
649ac3c1eaSdrh #include <stdio.h>
659ac3c1eaSdrh #include <string.h>
669ac3c1eaSdrh #include <ctype.h>
679ac3c1eaSdrh #include <stdlib.h>
689ac3c1eaSdrh #include <stdarg.h>
699ac3c1eaSdrh #include "sqlite3.h"
70e2b45d17Sdrh #ifndef _WIN32
71e2b45d17Sdrh # include <unistd.h>
72e2b45d17Sdrh #else
73e2b45d17Sdrh # include <io.h>
74e2b45d17Sdrh #endif
75c56fac74Sdrh #define ISALPHA(X) isalpha((unsigned char)(X))
769ac3c1eaSdrh
77e2b45d17Sdrh const char zHelp[] =
78e2b45d17Sdrh "Usage: wordcount [OPTIONS] DATABASE [INPUT]\n"
79e2b45d17Sdrh " --all Repeat the test for all test modes\n"
80e2b45d17Sdrh " --cachesize NNN Use a cache size of NNN\n"
81e2b45d17Sdrh " --commit NNN Commit after every NNN operations\n"
82e2b45d17Sdrh " --delete Use DELETE mode\n"
83e2b45d17Sdrh " --insert Use INSERT mode (the default)\n"
84e2b45d17Sdrh " --journal MMMM Use PRAGMA journal_mode=MMMM\n"
85e2b45d17Sdrh " --nocase Add the NOCASE collating sequence to the words.\n"
86e2b45d17Sdrh " --nosync Use PRAGMA synchronous=OFF\n"
87e2b45d17Sdrh " --pagesize NNN Use a page size of NNN\n"
88e2b45d17Sdrh " --query Use QUERY mode\n"
89e2b45d17Sdrh " --replace Use REPLACE mode\n"
90e2b45d17Sdrh " --select Use SELECT mode\n"
91e2b45d17Sdrh " --stats Show sqlite3_status() results at the end.\n"
92e2b45d17Sdrh " --summary Show summary information on the collected data.\n"
93e2b45d17Sdrh " --tag NAME Tag all output using NAME. Use only stdout.\n"
94e2b45d17Sdrh " --timer Time the operation of this program\n"
95e2b45d17Sdrh " --trace Enable sqlite3_trace() output.\n"
96e2b45d17Sdrh " --update Use UPDATE mode\n"
97*482dc645Sdrh " --upsert Use UPSERT mode\n"
98e2b45d17Sdrh " --without-rowid Use a WITHOUT ROWID table to store the words.\n"
99e2b45d17Sdrh ;
100e2b45d17Sdrh
10119eb77bbSdrh /* Output tag */
10219eb77bbSdrh char *zTag = "--";
10319eb77bbSdrh
104d2b637c2Sdrh /* Return the current wall-clock time */
realTime(void)105d2b637c2Sdrh static sqlite3_int64 realTime(void){
106d2b637c2Sdrh static sqlite3_vfs *clockVfs = 0;
107d2b637c2Sdrh sqlite3_int64 t;
108d2b637c2Sdrh if( clockVfs==0 ) clockVfs = sqlite3_vfs_find(0);
109d2b637c2Sdrh if( clockVfs->iVersion>=1 && clockVfs->xCurrentTimeInt64!=0 ){
110d2b637c2Sdrh clockVfs->xCurrentTimeInt64(clockVfs, &t);
111d2b637c2Sdrh }else{
112d2b637c2Sdrh double r;
113d2b637c2Sdrh clockVfs->xCurrentTime(clockVfs, &r);
114d2b637c2Sdrh t = (sqlite3_int64)(r*86400000.0);
115d2b637c2Sdrh }
116d2b637c2Sdrh return t;
117d2b637c2Sdrh }
118d2b637c2Sdrh
1199ac3c1eaSdrh /* Print an error message and exit */
fatal_error(const char * zMsg,...)1209ac3c1eaSdrh static void fatal_error(const char *zMsg, ...){
1219ac3c1eaSdrh va_list ap;
1229ac3c1eaSdrh va_start(ap, zMsg);
1239ac3c1eaSdrh vfprintf(stderr, zMsg, ap);
1249ac3c1eaSdrh va_end(ap);
1259ac3c1eaSdrh exit(1);
1269ac3c1eaSdrh }
1279ac3c1eaSdrh
128e2b45d17Sdrh /* Print a usage message and quit */
usage(void)129e2b45d17Sdrh static void usage(void){
130e2b45d17Sdrh printf("%s",zHelp);
131e2b45d17Sdrh exit(0);
132e2b45d17Sdrh }
133e2b45d17Sdrh
1349ac3c1eaSdrh /* The sqlite3_trace() callback function */
traceCallback(void * NotUsed,const char * zSql)1359ac3c1eaSdrh static void traceCallback(void *NotUsed, const char *zSql){
1369ac3c1eaSdrh printf("%s;\n", zSql);
1379ac3c1eaSdrh }
1389ac3c1eaSdrh
1390afa99e3Sdrh /* An sqlite3_exec() callback that prints results on standard output,
1400afa99e3Sdrh ** each column separated by a single space. */
printResult(void * NotUsed,int nArg,char ** azArg,char ** azNm)1410afa99e3Sdrh static int printResult(void *NotUsed, int nArg, char **azArg, char **azNm){
1420afa99e3Sdrh int i;
14319eb77bbSdrh printf("%s", zTag);
1440afa99e3Sdrh for(i=0; i<nArg; i++){
14555fcab39Sdrh printf(" %s", azArg[i] ? azArg[i] : "(null)");
1460afa99e3Sdrh }
1470afa99e3Sdrh printf("\n");
1480afa99e3Sdrh return 0;
1490afa99e3Sdrh }
1500afa99e3Sdrh
1510afa99e3Sdrh
15224f1985aSdrh /*
15324f1985aSdrh ** Add one character to a hash
15424f1985aSdrh */
addCharToHash(unsigned int * a,unsigned char x)15524f1985aSdrh static void addCharToHash(unsigned int *a, unsigned char x){
15624f1985aSdrh if( a[0]<4 ){
15724f1985aSdrh a[1] = (a[1]<<8) | x;
15824f1985aSdrh a[0]++;
15924f1985aSdrh }else{
16024f1985aSdrh a[2] = (a[2]<<8) | x;
16124f1985aSdrh a[0]++;
16224f1985aSdrh if( a[0]==8 ){
16324f1985aSdrh a[3] += a[1] + a[4];
16424f1985aSdrh a[4] += a[2] + a[3];
16524f1985aSdrh a[0] = a[1] = a[2] = 0;
16624f1985aSdrh }
16724f1985aSdrh }
16824f1985aSdrh }
16924f1985aSdrh
17024f1985aSdrh /*
17124f1985aSdrh ** Compute the final hash value.
17224f1985aSdrh */
finalHash(unsigned int * a,char * z)17324f1985aSdrh static void finalHash(unsigned int *a, char *z){
17424f1985aSdrh a[3] += a[1] + a[4] + a[0];
17524f1985aSdrh a[4] += a[2] + a[3];
17624f1985aSdrh sqlite3_snprintf(17, z, "%08x%08x", a[3], a[4]);
17724f1985aSdrh }
17824f1985aSdrh
17924f1985aSdrh
18024f1985aSdrh /*
18124f1985aSdrh ** Implementation of a checksum() aggregate SQL function
18224f1985aSdrh */
checksumStep(sqlite3_context * context,int argc,sqlite3_value ** argv)18324f1985aSdrh static void checksumStep(
18424f1985aSdrh sqlite3_context *context,
18524f1985aSdrh int argc,
18624f1985aSdrh sqlite3_value **argv
18724f1985aSdrh ){
18824f1985aSdrh const unsigned char *zVal;
18924f1985aSdrh int nVal, i, j;
19024f1985aSdrh unsigned int *a;
19124f1985aSdrh a = (unsigned*)sqlite3_aggregate_context(context, sizeof(unsigned int)*5);
19224f1985aSdrh
19324f1985aSdrh if( a ){
19424f1985aSdrh for(i=0; i<argc; i++){
19524f1985aSdrh nVal = sqlite3_value_bytes(argv[i]);
19624f1985aSdrh zVal = (const unsigned char*)sqlite3_value_text(argv[i]);
19724f1985aSdrh if( zVal ) for(j=0; j<nVal; j++) addCharToHash(a, zVal[j]);
19824f1985aSdrh addCharToHash(a, '|');
19924f1985aSdrh }
20024f1985aSdrh addCharToHash(a, '\n');
20124f1985aSdrh }
20224f1985aSdrh }
checksumFinalize(sqlite3_context * context)20324f1985aSdrh static void checksumFinalize(sqlite3_context *context){
20424f1985aSdrh unsigned int *a;
20524f1985aSdrh char zResult[24];
20624f1985aSdrh a = sqlite3_aggregate_context(context, 0);
20724f1985aSdrh if( a ){
20824f1985aSdrh finalHash(a, zResult);
20924f1985aSdrh sqlite3_result_text(context, zResult, -1, SQLITE_TRANSIENT);
21024f1985aSdrh }
21124f1985aSdrh }
21224f1985aSdrh
2139ac3c1eaSdrh /* Define operating modes */
2149ac3c1eaSdrh #define MODE_INSERT 0
2159ac3c1eaSdrh #define MODE_REPLACE 1
216*482dc645Sdrh #define MODE_UPSERT 2
217*482dc645Sdrh #define MODE_SELECT 3
218*482dc645Sdrh #define MODE_UPDATE 4
219*482dc645Sdrh #define MODE_DELETE 5
220*482dc645Sdrh #define MODE_QUERY 6
221*482dc645Sdrh #define MODE_COUNT 7
222e2b45d17Sdrh #define MODE_ALL (-1)
223e2b45d17Sdrh
224e2b45d17Sdrh /* Mode names */
225e2b45d17Sdrh static const char *azMode[] = {
226e2b45d17Sdrh "--insert",
227e2b45d17Sdrh "--replace",
228*482dc645Sdrh "--upsert",
229e2b45d17Sdrh "--select",
230e2b45d17Sdrh "--update",
231e2b45d17Sdrh "--delete",
232e2b45d17Sdrh "--query"
233e2b45d17Sdrh };
234e2b45d17Sdrh
235e2b45d17Sdrh /*
236e2b45d17Sdrh ** Determine if another iteration of the test is required. Return true
237e2b45d17Sdrh ** if so. Return zero if all iterations have finished.
238e2b45d17Sdrh */
allLoop(int iMode,int * piLoopCnt,int * piMode2,int * pUseWithoutRowid)239e2b45d17Sdrh static int allLoop(
240e2b45d17Sdrh int iMode, /* The selected test mode */
241e2b45d17Sdrh int *piLoopCnt, /* Iteration loop counter */
242e2b45d17Sdrh int *piMode2, /* The test mode to use on the next iteration */
243e2b45d17Sdrh int *pUseWithoutRowid /* Whether or not to use --without-rowid */
244e2b45d17Sdrh ){
245e2b45d17Sdrh int i;
246e2b45d17Sdrh if( iMode!=MODE_ALL ){
247e2b45d17Sdrh if( *piLoopCnt ) return 0;
248e2b45d17Sdrh *piMode2 = iMode;
249e2b45d17Sdrh *piLoopCnt = 1;
250e2b45d17Sdrh return 1;
251e2b45d17Sdrh }
252e2b45d17Sdrh if( (*piLoopCnt)>=MODE_COUNT*2 ) return 0;
253e2b45d17Sdrh i = (*piLoopCnt)++;
254e2b45d17Sdrh *pUseWithoutRowid = i&1;
255e2b45d17Sdrh *piMode2 = i>>1;
256e2b45d17Sdrh return 1;
257e2b45d17Sdrh }
2589ac3c1eaSdrh
main(int argc,char ** argv)2599ac3c1eaSdrh int main(int argc, char **argv){
2609ac3c1eaSdrh const char *zFileToRead = 0; /* Input file. NULL for stdin */
2619ac3c1eaSdrh const char *zDbName = 0; /* Name of the database file to create */
2629ac3c1eaSdrh int useWithoutRowid = 0; /* True for --without-rowid */
2639ac3c1eaSdrh int iMode = MODE_INSERT; /* One of MODE_xxxxx */
264e2b45d17Sdrh int iMode2; /* Mode to use for current --all iteration */
265e2b45d17Sdrh int iLoopCnt = 0; /* Which iteration when running --all */
2669ac3c1eaSdrh int useNocase = 0; /* True for --nocase */
2679ac3c1eaSdrh int doTrace = 0; /* True for --trace */
2680afa99e3Sdrh int showStats = 0; /* True for --stats */
2690afa99e3Sdrh int showSummary = 0; /* True for --summary */
270d2b637c2Sdrh int showTimer = 0; /* True for --timer */
271ac873261Sdrh int cacheSize = 0; /* Desired cache size. 0 means default */
272ac873261Sdrh int pageSize = 0; /* Desired page size. 0 means default */
273ac873261Sdrh int commitInterval = 0; /* How often to commit. 0 means never */
274ac873261Sdrh int noSync = 0; /* True for --nosync */
275ac873261Sdrh const char *zJMode = 0; /* Journal mode */
276ac873261Sdrh int nOp = 0; /* Operation counter */
2779ac3c1eaSdrh int i, j; /* Loop counters */
2789ac3c1eaSdrh sqlite3 *db; /* The SQLite database connection */
2799ac3c1eaSdrh char *zSql; /* Constructed SQL statement */
2809ac3c1eaSdrh sqlite3_stmt *pInsert = 0; /* The INSERT statement */
2819ac3c1eaSdrh sqlite3_stmt *pUpdate = 0; /* The UPDATE statement */
2829ac3c1eaSdrh sqlite3_stmt *pSelect = 0; /* The SELECT statement */
283ac873261Sdrh sqlite3_stmt *pDelete = 0; /* The DELETE statement */
2849ac3c1eaSdrh FILE *in; /* The open input file */
2859ac3c1eaSdrh int rc; /* Return code from an SQLite interface */
2869ac3c1eaSdrh int iCur, iHiwtr; /* Statistics values, current and "highwater" */
28719eb77bbSdrh FILE *pTimer = stderr; /* Output channel for the timer */
288776f3a2fSdrh sqlite3_int64 sumCnt = 0; /* Sum in QUERY mode */
289e2b45d17Sdrh sqlite3_int64 startTime; /* Time of start */
290e2b45d17Sdrh sqlite3_int64 totalTime = 0; /* Total time */
2919ac3c1eaSdrh char zInput[2000]; /* A single line of input */
2929ac3c1eaSdrh
2939ac3c1eaSdrh /* Process command-line arguments */
2949ac3c1eaSdrh for(i=1; i<argc; i++){
2959ac3c1eaSdrh const char *z = argv[i];
2969ac3c1eaSdrh if( z[0]=='-' ){
2979ac3c1eaSdrh do{ z++; }while( z[0]=='-' );
2989ac3c1eaSdrh if( strcmp(z,"without-rowid")==0 ){
2999ac3c1eaSdrh useWithoutRowid = 1;
3009ac3c1eaSdrh }else if( strcmp(z,"replace")==0 ){
3019ac3c1eaSdrh iMode = MODE_REPLACE;
302*482dc645Sdrh }else if( strcmp(z,"upsert")==0 ){
303*482dc645Sdrh iMode = MODE_UPSERT;
3049ac3c1eaSdrh }else if( strcmp(z,"select")==0 ){
3059ac3c1eaSdrh iMode = MODE_SELECT;
3069ac3c1eaSdrh }else if( strcmp(z,"insert")==0 ){
3079ac3c1eaSdrh iMode = MODE_INSERT;
3089ac3c1eaSdrh }else if( strcmp(z,"update")==0 ){
3099ac3c1eaSdrh iMode = MODE_UPDATE;
310ac873261Sdrh }else if( strcmp(z,"delete")==0 ){
311ac873261Sdrh iMode = MODE_DELETE;
312776f3a2fSdrh }else if( strcmp(z,"query")==0 ){
313776f3a2fSdrh iMode = MODE_QUERY;
314e2b45d17Sdrh }else if( strcmp(z,"all")==0 ){
315e2b45d17Sdrh iMode = MODE_ALL;
316e2b45d17Sdrh showTimer = -99;
3179ac3c1eaSdrh }else if( strcmp(z,"nocase")==0 ){
3189ac3c1eaSdrh useNocase = 1;
3199ac3c1eaSdrh }else if( strcmp(z,"trace")==0 ){
3209ac3c1eaSdrh doTrace = 1;
321ac873261Sdrh }else if( strcmp(z,"nosync")==0 ){
322ac873261Sdrh noSync = 1;
3230afa99e3Sdrh }else if( strcmp(z,"stats")==0 ){
3240afa99e3Sdrh showStats = 1;
3250afa99e3Sdrh }else if( strcmp(z,"summary")==0 ){
3260afa99e3Sdrh showSummary = 1;
327d2b637c2Sdrh }else if( strcmp(z,"timer")==0 ){
328d2b637c2Sdrh showTimer = i;
329ac873261Sdrh }else if( strcmp(z,"cachesize")==0 && i<argc-1 ){
330ac873261Sdrh i++;
331ac873261Sdrh cacheSize = atoi(argv[i]);
332ac873261Sdrh }else if( strcmp(z,"pagesize")==0 && i<argc-1 ){
333ac873261Sdrh i++;
334ac873261Sdrh pageSize = atoi(argv[i]);
335ac873261Sdrh }else if( strcmp(z,"commit")==0 && i<argc-1 ){
336ac873261Sdrh i++;
337ac873261Sdrh commitInterval = atoi(argv[i]);
338ac873261Sdrh }else if( strcmp(z,"journal")==0 && i<argc-1 ){
339ac873261Sdrh zJMode = argv[++i];
34019eb77bbSdrh }else if( strcmp(z,"tag")==0 && i<argc-1 ){
34119eb77bbSdrh zTag = argv[++i];
34219eb77bbSdrh pTimer = stdout;
343e2b45d17Sdrh }else if( strcmp(z, "help")==0 || strcmp(z,"?")==0 ){
344e2b45d17Sdrh usage();
3459ac3c1eaSdrh }else{
346e2b45d17Sdrh fatal_error("unknown option: \"%s\"\n"
347e2b45d17Sdrh "Use --help for a list of options\n",
348e2b45d17Sdrh argv[i]);
3499ac3c1eaSdrh }
3509ac3c1eaSdrh }else if( zDbName==0 ){
3519ac3c1eaSdrh zDbName = argv[i];
3529ac3c1eaSdrh }else if( zFileToRead==0 ){
3539ac3c1eaSdrh zFileToRead = argv[i];
3549ac3c1eaSdrh }else{
355e2b45d17Sdrh fatal_error("surplus argument: \"%s\"\n", argv[i]);
3569ac3c1eaSdrh }
3579ac3c1eaSdrh }
3589ac3c1eaSdrh if( zDbName==0 ){
359e2b45d17Sdrh usage();
3609ac3c1eaSdrh }
361d2b637c2Sdrh startTime = realTime();
3629ac3c1eaSdrh
3639ac3c1eaSdrh /* Open the database and the input file */
364e2b45d17Sdrh if( zDbName[0] && strcmp(zDbName,":memory:")!=0 ){
365e2b45d17Sdrh unlink(zDbName);
366e2b45d17Sdrh }
3679ac3c1eaSdrh if( sqlite3_open(zDbName, &db) ){
3689ac3c1eaSdrh fatal_error("Cannot open database file: %s\n", zDbName);
3699ac3c1eaSdrh }
3709ac3c1eaSdrh if( zFileToRead ){
3719ac3c1eaSdrh in = fopen(zFileToRead, "rb");
3729ac3c1eaSdrh if( in==0 ){
3739ac3c1eaSdrh fatal_error("Could not open input file \"%s\"\n", zFileToRead);
3749ac3c1eaSdrh }
3759ac3c1eaSdrh }else{
376e2b45d17Sdrh if( iMode==MODE_ALL ){
377e2b45d17Sdrh fatal_error("The --all mode cannot be used with stdin\n");
378e2b45d17Sdrh }
3799ac3c1eaSdrh in = stdin;
3809ac3c1eaSdrh }
3819ac3c1eaSdrh
382ac873261Sdrh /* Set database connection options */
3839ac3c1eaSdrh if( doTrace ) sqlite3_trace(db, traceCallback, 0);
384ac873261Sdrh if( pageSize ){
385ac873261Sdrh zSql = sqlite3_mprintf("PRAGMA page_size=%d", pageSize);
386ac873261Sdrh sqlite3_exec(db, zSql, 0, 0, 0);
387ac873261Sdrh sqlite3_free(zSql);
388ac873261Sdrh }
389ac873261Sdrh if( cacheSize ){
390ac873261Sdrh zSql = sqlite3_mprintf("PRAGMA cache_size=%d", cacheSize);
391ac873261Sdrh sqlite3_exec(db, zSql, 0, 0, 0);
392ac873261Sdrh sqlite3_free(zSql);
393ac873261Sdrh }
394ac873261Sdrh if( noSync ) sqlite3_exec(db, "PRAGMA synchronous=OFF", 0, 0, 0);
395ac873261Sdrh if( zJMode ){
396ac873261Sdrh zSql = sqlite3_mprintf("PRAGMA journal_mode=%s", zJMode);
397ac873261Sdrh sqlite3_exec(db, zSql, 0, 0, 0);
398ac873261Sdrh sqlite3_free(zSql);
399ac873261Sdrh }
400ac873261Sdrh
401e2b45d17Sdrh iLoopCnt = 0;
402e2b45d17Sdrh while( allLoop(iMode, &iLoopCnt, &iMode2, &useWithoutRowid) ){
403e2b45d17Sdrh /* Delete prior content in --all mode */
404e2b45d17Sdrh if( iMode==MODE_ALL ){
405e2b45d17Sdrh if( sqlite3_exec(db, "DROP TABLE IF EXISTS wordcount; VACUUM;",0,0,0) ){
406e2b45d17Sdrh fatal_error("Could not clean up prior iteration\n");
407e2b45d17Sdrh }
408e2b45d17Sdrh startTime = realTime();
409e2b45d17Sdrh rewind(in);
410e2b45d17Sdrh }
411ac873261Sdrh
412ac873261Sdrh /* Construct the "wordcount" table into which to put the words */
4139ac3c1eaSdrh if( sqlite3_exec(db, "BEGIN IMMEDIATE", 0, 0, 0) ){
4149ac3c1eaSdrh fatal_error("Could not start a transaction\n");
4159ac3c1eaSdrh }
4169ac3c1eaSdrh zSql = sqlite3_mprintf(
417ac873261Sdrh "CREATE TABLE IF NOT EXISTS wordcount(\n"
4189ac3c1eaSdrh " word TEXT PRIMARY KEY COLLATE %s,\n"
4199ac3c1eaSdrh " cnt INTEGER\n"
4209ac3c1eaSdrh ")%s",
4219ac3c1eaSdrh useNocase ? "nocase" : "binary",
4229ac3c1eaSdrh useWithoutRowid ? " WITHOUT ROWID" : ""
4239ac3c1eaSdrh );
4249ac3c1eaSdrh if( zSql==0 ) fatal_error("out of memory\n");
4259ac3c1eaSdrh rc = sqlite3_exec(db, zSql, 0, 0, 0);
4269ac3c1eaSdrh if( rc ) fatal_error("Could not create the wordcount table: %s.\n",
4279ac3c1eaSdrh sqlite3_errmsg(db));
4289ac3c1eaSdrh sqlite3_free(zSql);
4299ac3c1eaSdrh
4309ac3c1eaSdrh /* Prepare SQL statements that will be needed */
431e2b45d17Sdrh if( iMode2==MODE_QUERY ){
432776f3a2fSdrh rc = sqlite3_prepare_v2(db,
433776f3a2fSdrh "SELECT cnt FROM wordcount WHERE word=?1",
434776f3a2fSdrh -1, &pSelect, 0);
435776f3a2fSdrh if( rc ) fatal_error("Could not prepare the SELECT statement: %s\n",
436776f3a2fSdrh sqlite3_errmsg(db));
437776f3a2fSdrh }
438e2b45d17Sdrh if( iMode2==MODE_SELECT ){
4399ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4409ac3c1eaSdrh "SELECT 1 FROM wordcount WHERE word=?1",
4419ac3c1eaSdrh -1, &pSelect, 0);
4429ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the SELECT statement: %s\n",
4439ac3c1eaSdrh sqlite3_errmsg(db));
4449ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4459ac3c1eaSdrh "INSERT INTO wordcount(word,cnt) VALUES(?1,1)",
4469ac3c1eaSdrh -1, &pInsert, 0);
4479ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4489ac3c1eaSdrh sqlite3_errmsg(db));
4499ac3c1eaSdrh }
450e2b45d17Sdrh if( iMode2==MODE_SELECT || iMode2==MODE_UPDATE || iMode2==MODE_INSERT ){
4519ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4529ac3c1eaSdrh "UPDATE wordcount SET cnt=cnt+1 WHERE word=?1",
4539ac3c1eaSdrh -1, &pUpdate, 0);
4549ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the UPDATE statement: %s\n",
4559ac3c1eaSdrh sqlite3_errmsg(db));
4569ac3c1eaSdrh }
457e2b45d17Sdrh if( iMode2==MODE_INSERT ){
4589ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4599ac3c1eaSdrh "INSERT OR IGNORE INTO wordcount(word,cnt) VALUES(?1,1)",
4609ac3c1eaSdrh -1, &pInsert, 0);
4619ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4629ac3c1eaSdrh sqlite3_errmsg(db));
4639ac3c1eaSdrh }
464e2b45d17Sdrh if( iMode2==MODE_UPDATE ){
4659ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4669ac3c1eaSdrh "INSERT OR IGNORE INTO wordcount(word,cnt) VALUES(?1,0)",
4679ac3c1eaSdrh -1, &pInsert, 0);
4689ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the INSERT statement: %s\n",
4699ac3c1eaSdrh sqlite3_errmsg(db));
4709ac3c1eaSdrh }
471e2b45d17Sdrh if( iMode2==MODE_REPLACE ){
4729ac3c1eaSdrh rc = sqlite3_prepare_v2(db,
4739ac3c1eaSdrh "REPLACE INTO wordcount(word,cnt)"
4749ac3c1eaSdrh "VALUES(?1,coalesce((SELECT cnt FROM wordcount WHERE word=?1),0)+1)",
4759ac3c1eaSdrh -1, &pInsert, 0);
4769ac3c1eaSdrh if( rc ) fatal_error("Could not prepare the REPLACE statement: %s\n",
4779ac3c1eaSdrh sqlite3_errmsg(db));
4789ac3c1eaSdrh }
479*482dc645Sdrh if( iMode2==MODE_UPSERT ){
480*482dc645Sdrh rc = sqlite3_prepare_v2(db,
481*482dc645Sdrh "INSERT INTO wordcount(word,cnt) VALUES(?1,1) "
482*482dc645Sdrh "ON CONFLICT(word) DO UPDATE SET cnt=cnt+1",
483*482dc645Sdrh -1, &pInsert, 0);
484*482dc645Sdrh if( rc ) fatal_error("Could not prepare the UPSERT statement: %s\n",
485*482dc645Sdrh sqlite3_errmsg(db));
486*482dc645Sdrh }
487e2b45d17Sdrh if( iMode2==MODE_DELETE ){
488ac873261Sdrh rc = sqlite3_prepare_v2(db,
489ac873261Sdrh "DELETE FROM wordcount WHERE word=?1",
490ac873261Sdrh -1, &pDelete, 0);
491ac873261Sdrh if( rc ) fatal_error("Could not prepare the DELETE statement: %s\n",
492ac873261Sdrh sqlite3_errmsg(db));
493ac873261Sdrh }
4949ac3c1eaSdrh
4959ac3c1eaSdrh /* Process the input file */
4969ac3c1eaSdrh while( fgets(zInput, sizeof(zInput), in) ){
4979ac3c1eaSdrh for(i=0; zInput[i]; i++){
498c56fac74Sdrh if( !ISALPHA(zInput[i]) ) continue;
499c56fac74Sdrh for(j=i+1; ISALPHA(zInput[j]); j++){}
5009ac3c1eaSdrh
5019ac3c1eaSdrh /* Found a new word at zInput[i] that is j-i bytes long.
5029ac3c1eaSdrh ** Process it into the wordcount table. */
503e2b45d17Sdrh if( iMode2==MODE_DELETE ){
504ac873261Sdrh sqlite3_bind_text(pDelete, 1, zInput+i, j-i, SQLITE_STATIC);
505ac873261Sdrh if( sqlite3_step(pDelete)!=SQLITE_DONE ){
506ac873261Sdrh fatal_error("DELETE failed: %s\n", sqlite3_errmsg(db));
507ac873261Sdrh }
508ac873261Sdrh sqlite3_reset(pDelete);
509e2b45d17Sdrh }else if( iMode2==MODE_SELECT ){
5109ac3c1eaSdrh sqlite3_bind_text(pSelect, 1, zInput+i, j-i, SQLITE_STATIC);
5119ac3c1eaSdrh rc = sqlite3_step(pSelect);
5129ac3c1eaSdrh sqlite3_reset(pSelect);
5139ac3c1eaSdrh if( rc==SQLITE_ROW ){
5149ac3c1eaSdrh sqlite3_bind_text(pUpdate, 1, zInput+i, j-i, SQLITE_STATIC);
5159ac3c1eaSdrh if( sqlite3_step(pUpdate)!=SQLITE_DONE ){
5169ac3c1eaSdrh fatal_error("UPDATE failed: %s\n", sqlite3_errmsg(db));
5179ac3c1eaSdrh }
5189ac3c1eaSdrh sqlite3_reset(pUpdate);
5199ac3c1eaSdrh }else if( rc==SQLITE_DONE ){
5209ac3c1eaSdrh sqlite3_bind_text(pInsert, 1, zInput+i, j-i, SQLITE_STATIC);
5219ac3c1eaSdrh if( sqlite3_step(pInsert)!=SQLITE_DONE ){
5229ac3c1eaSdrh fatal_error("Insert failed: %s\n", sqlite3_errmsg(db));
5239ac3c1eaSdrh }
5249ac3c1eaSdrh sqlite3_reset(pInsert);
5259ac3c1eaSdrh }else{
5269ac3c1eaSdrh fatal_error("SELECT failed: %s\n", sqlite3_errmsg(db));
5279ac3c1eaSdrh }
528e2b45d17Sdrh }else if( iMode2==MODE_QUERY ){
529776f3a2fSdrh sqlite3_bind_text(pSelect, 1, zInput+i, j-i, SQLITE_STATIC);
530776f3a2fSdrh if( sqlite3_step(pSelect)==SQLITE_ROW ){
531776f3a2fSdrh sumCnt += sqlite3_column_int64(pSelect, 0);
532776f3a2fSdrh }
533776f3a2fSdrh sqlite3_reset(pSelect);
5349ac3c1eaSdrh }else{
5359ac3c1eaSdrh sqlite3_bind_text(pInsert, 1, zInput+i, j-i, SQLITE_STATIC);
5369ac3c1eaSdrh if( sqlite3_step(pInsert)!=SQLITE_DONE ){
5379ac3c1eaSdrh fatal_error("INSERT failed: %s\n", sqlite3_errmsg(db));
5389ac3c1eaSdrh }
5399ac3c1eaSdrh sqlite3_reset(pInsert);
540e2b45d17Sdrh if( iMode2==MODE_UPDATE
541e2b45d17Sdrh || (iMode2==MODE_INSERT && sqlite3_changes(db)==0)
5429ac3c1eaSdrh ){
5439ac3c1eaSdrh sqlite3_bind_text(pUpdate, 1, zInput+i, j-i, SQLITE_STATIC);
5449ac3c1eaSdrh if( sqlite3_step(pUpdate)!=SQLITE_DONE ){
5459ac3c1eaSdrh fatal_error("UPDATE failed: %s\n", sqlite3_errmsg(db));
5469ac3c1eaSdrh }
5479ac3c1eaSdrh sqlite3_reset(pUpdate);
5489ac3c1eaSdrh }
5499ac3c1eaSdrh }
5509ac3c1eaSdrh i = j-1;
551ac873261Sdrh
552ac873261Sdrh /* Increment the operation counter. Do a COMMIT if it is time. */
553ac873261Sdrh nOp++;
554ac873261Sdrh if( commitInterval>0 && (nOp%commitInterval)==0 ){
555ac873261Sdrh sqlite3_exec(db, "COMMIT; BEGIN IMMEDIATE", 0, 0, 0);
556ac873261Sdrh }
5579ac3c1eaSdrh }
5589ac3c1eaSdrh }
5599ac3c1eaSdrh sqlite3_exec(db, "COMMIT", 0, 0, 0);
560e2b45d17Sdrh sqlite3_finalize(pInsert); pInsert = 0;
561e2b45d17Sdrh sqlite3_finalize(pUpdate); pUpdate = 0;
562e2b45d17Sdrh sqlite3_finalize(pSelect); pSelect = 0;
563e2b45d17Sdrh sqlite3_finalize(pDelete); pDelete = 0;
5649ac3c1eaSdrh
565e2b45d17Sdrh if( iMode2==MODE_QUERY && iMode!=MODE_ALL ){
56619eb77bbSdrh printf("%s sum of cnt: %lld\n", zTag, sumCnt);
567776f3a2fSdrh rc = sqlite3_prepare_v2(db,"SELECT sum(cnt*cnt) FROM wordcount", -1,
568776f3a2fSdrh &pSelect, 0);
569776f3a2fSdrh if( rc==SQLITE_OK && sqlite3_step(pSelect)==SQLITE_ROW ){
57019eb77bbSdrh printf("%s double-check: %lld\n", zTag,sqlite3_column_int64(pSelect,0));
571776f3a2fSdrh }
572776f3a2fSdrh sqlite3_finalize(pSelect);
573776f3a2fSdrh }
574776f3a2fSdrh
575d2b637c2Sdrh
576d2b637c2Sdrh if( showTimer ){
577d2b637c2Sdrh sqlite3_int64 elapseTime = realTime() - startTime;
578e2b45d17Sdrh totalTime += elapseTime;
57919eb77bbSdrh fprintf(pTimer, "%3d.%03d wordcount", (int)(elapseTime/1000),
5807a909855Sdrh (int)(elapseTime%1000));
581e2b45d17Sdrh if( iMode==MODE_ALL ){
582e2b45d17Sdrh fprintf(pTimer, " %s%s\n", azMode[iMode2],
583e2b45d17Sdrh useWithoutRowid? " --without-rowid" : "");
584e2b45d17Sdrh }else{
58519eb77bbSdrh for(i=1; i<argc; i++) if( i!=showTimer ) fprintf(pTimer," %s",argv[i]);
58619eb77bbSdrh fprintf(pTimer, "\n");
587d2b637c2Sdrh }
588e2b45d17Sdrh }
589d2b637c2Sdrh
5900afa99e3Sdrh if( showSummary ){
59124f1985aSdrh sqlite3_create_function(db, "checksum", -1, SQLITE_UTF8, 0,
59224f1985aSdrh 0, checksumStep, checksumFinalize);
5930afa99e3Sdrh sqlite3_exec(db,
59424f1985aSdrh "SELECT 'count(*): ', count(*) FROM wordcount;\n"
59524f1985aSdrh "SELECT 'sum(cnt): ', sum(cnt) FROM wordcount;\n"
59624f1985aSdrh "SELECT 'max(cnt): ', max(cnt) FROM wordcount;\n"
59724f1985aSdrh "SELECT 'avg(cnt): ', avg(cnt) FROM wordcount;\n"
59824f1985aSdrh "SELECT 'sum(cnt=1):', sum(cnt=1) FROM wordcount;\n"
59924f1985aSdrh "SELECT 'top 10: ', group_concat(word, ', ') FROM "
600aedfc507Sdrh "(SELECT word FROM wordcount ORDER BY cnt DESC, word LIMIT 10);\n"
60124f1985aSdrh "SELECT 'checksum: ', checksum(word, cnt) FROM "
60224f1985aSdrh "(SELECT word, cnt FROM wordcount ORDER BY word);\n"
60324f1985aSdrh "PRAGMA integrity_check;\n",
6040afa99e3Sdrh printResult, 0, 0);
6050afa99e3Sdrh }
606e2b45d17Sdrh } /* End the --all loop */
607e2b45d17Sdrh
608e2b45d17Sdrh /* Close the input file after the last read */
609e2b45d17Sdrh if( zFileToRead ) fclose(in);
610e2b45d17Sdrh
611e2b45d17Sdrh /* In --all mode, so the total time */
612e2b45d17Sdrh if( iMode==MODE_ALL && showTimer ){
613e2b45d17Sdrh fprintf(pTimer, "%3d.%03d wordcount --all\n", (int)(totalTime/1000),
614e2b45d17Sdrh (int)(totalTime%1000));
615e2b45d17Sdrh }
6160afa99e3Sdrh
6179ac3c1eaSdrh /* Database connection statistics printed after both prepared statements
6189ac3c1eaSdrh ** have been finalized */
6190afa99e3Sdrh if( showStats ){
6209ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_USED, &iCur, &iHiwtr, 0);
62119eb77bbSdrh printf("%s Lookaside Slots Used: %d (max %d)\n", zTag, iCur,iHiwtr);
6229ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_HIT, &iCur, &iHiwtr, 0);
62319eb77bbSdrh printf("%s Successful lookasides: %d\n", zTag, iHiwtr);
6249ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &iCur,&iHiwtr,0);
62519eb77bbSdrh printf("%s Lookaside size faults: %d\n", zTag, iHiwtr);
6269ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &iCur,&iHiwtr,0);
62719eb77bbSdrh printf("%s Lookaside OOM faults: %d\n", zTag, iHiwtr);
6289ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_USED, &iCur, &iHiwtr, 0);
62919eb77bbSdrh printf("%s Pager Heap Usage: %d bytes\n", zTag, iCur);
6309ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_HIT, &iCur, &iHiwtr, 1);
63119eb77bbSdrh printf("%s Page cache hits: %d\n", zTag, iCur);
6329ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_MISS, &iCur, &iHiwtr, 1);
63319eb77bbSdrh printf("%s Page cache misses: %d\n", zTag, iCur);
6349ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_CACHE_WRITE, &iCur, &iHiwtr, 1);
63519eb77bbSdrh printf("%s Page cache writes: %d\n", zTag, iCur);
6369ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_SCHEMA_USED, &iCur, &iHiwtr, 0);
63719eb77bbSdrh printf("%s Schema Heap Usage: %d bytes\n", zTag, iCur);
6389ac3c1eaSdrh sqlite3_db_status(db, SQLITE_DBSTATUS_STMT_USED, &iCur, &iHiwtr, 0);
63919eb77bbSdrh printf("%s Statement Heap Usage: %d bytes\n", zTag, iCur);
6400afa99e3Sdrh }
6419ac3c1eaSdrh
6429ac3c1eaSdrh sqlite3_close(db);
6439ac3c1eaSdrh
6449ac3c1eaSdrh /* Global memory usage statistics printed after the database connection
6459ac3c1eaSdrh ** has closed. Memory usage should be zero at this point. */
6460afa99e3Sdrh if( showStats ){
6479ac3c1eaSdrh sqlite3_status(SQLITE_STATUS_MEMORY_USED, &iCur, &iHiwtr, 0);
64819eb77bbSdrh printf("%s Memory Used (bytes): %d (max %d)\n", zTag,iCur,iHiwtr);
6499ac3c1eaSdrh sqlite3_status(SQLITE_STATUS_MALLOC_COUNT, &iCur, &iHiwtr, 0);
65019eb77bbSdrh printf("%s Outstanding Allocations: %d (max %d)\n",zTag,iCur,iHiwtr);
6519ac3c1eaSdrh sqlite3_status(SQLITE_STATUS_PAGECACHE_OVERFLOW, &iCur, &iHiwtr, 0);
65219eb77bbSdrh printf("%s Pcache Overflow Bytes: %d (max %d)\n",zTag,iCur,iHiwtr);
6539ac3c1eaSdrh sqlite3_status(SQLITE_STATUS_MALLOC_SIZE, &iCur, &iHiwtr, 0);
65419eb77bbSdrh printf("%s Largest Allocation: %d bytes\n",zTag,iHiwtr);
6559ac3c1eaSdrh sqlite3_status(SQLITE_STATUS_PAGECACHE_SIZE, &iCur, &iHiwtr, 0);
65619eb77bbSdrh printf("%s Largest Pcache Allocation: %d bytes\n",zTag,iHiwtr);
6570afa99e3Sdrh }
6589ac3c1eaSdrh return 0;
6599ac3c1eaSdrh }
660