xref: /sqlite-3.40.0/tool/loadfts.c (revision fcd43253)
1 /*
2 ** 2014-07-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file implements a utility program that will load many disk
14 ** files (all files under a given directory) into a FTS table.  This is
15 ** used for performance testing of FTS3, FTS4, and FTS5.
16 */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <ctype.h>
21 #include <assert.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <dirent.h>
25 #include "sqlite3.h"
26 
27 /*
28 ** Implementation of the "readtext(X)" SQL function.  The entire content
29 ** of the file named X is read and returned as a TEXT value. It is assumed
30 ** the file contains UTF-8 text. NULL is returned if the file does not
31 ** exist or is unreadable.
32 */
readfileFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)33 static void readfileFunc(
34   sqlite3_context *context,
35   int argc,
36   sqlite3_value **argv
37 ){
38   const char *zName;
39   FILE *in;
40   long nIn;
41   void *pBuf;
42 
43   zName = (const char*)sqlite3_value_text(argv[0]);
44   if( zName==0 ) return;
45   in = fopen(zName, "rb");
46   if( in==0 ) return;
47   fseek(in, 0, SEEK_END);
48   nIn = ftell(in);
49   rewind(in);
50   pBuf = sqlite3_malloc( nIn );
51   if( pBuf && 1==fread(pBuf, nIn, 1, in) ){
52     sqlite3_result_text(context, pBuf, nIn, sqlite3_free);
53   }else{
54     sqlite3_free(pBuf);
55   }
56   fclose(in);
57 }
58 
59 /*
60 ** Print usage text for this program and exit.
61 */
showHelp(const char * zArgv0)62 static void showHelp(const char *zArgv0){
63   printf("\n"
64 "Usage: %s SWITCHES... DB\n"
65 "\n"
66 "  This program opens the database named on the command line and attempts to\n"
67 "  create an FTS table named \"fts\" with a single column. If successful, it\n"
68 "  recursively traverses the directory named by the -dir option and inserts\n"
69 "  the contents of each file into the fts table. All files are assumed to\n"
70 "  contain UTF-8 text.\n"
71 "\n"
72 "Switches are:\n"
73 "  -fts [345]       FTS version to use (default=5)\n"
74 "  -idx [01]        Create a mapping from filename to rowid (default=0)\n"
75 "  -dir <path>      Root of directory tree to load data from (default=.)\n"
76 "  -trans <integer> Number of inserts per transaction (default=1)\n"
77 , zArgv0
78 );
79   exit(1);
80 }
81 
82 /*
83 ** Exit with a message based on the argument and the current value of errno.
84 */
error_out(const char * zText)85 static void error_out(const char *zText){
86   fprintf(stderr, "%s: %s\n", zText, strerror(errno));
87   exit(-1);
88 }
89 
90 /*
91 ** Exit with a message based on the first argument and the error message
92 ** currently stored in database handle db.
93 */
sqlite_error_out(const char * zText,sqlite3 * db)94 static void sqlite_error_out(const char *zText, sqlite3 *db){
95   fprintf(stderr, "%s: %s\n", zText, sqlite3_errmsg(db));
96   exit(-1);
97 }
98 
99 /*
100 ** Context object for visit_file().
101 */
102 typedef struct VisitContext VisitContext;
103 struct VisitContext {
104   int nRowPerTrans;
105   sqlite3 *db;                    /* Database handle */
106   sqlite3_stmt *pInsert;          /* INSERT INTO fts VALUES(readtext(:1)) */
107 };
108 
109 /*
110 ** Callback used with traverse(). The first argument points to an object
111 ** of type VisitContext. This function inserts the contents of the text
112 ** file zPath into the FTS table.
113 */
visit_file(void * pCtx,const char * zPath)114 void visit_file(void *pCtx, const char *zPath){
115   int rc;
116   VisitContext *p = (VisitContext*)pCtx;
117   /* printf("%s\n", zPath); */
118   sqlite3_bind_text(p->pInsert, 1, zPath, -1, SQLITE_STATIC);
119   sqlite3_step(p->pInsert);
120   rc = sqlite3_reset(p->pInsert);
121   if( rc!=SQLITE_OK ){
122     sqlite_error_out("insert", p->db);
123   }else if( p->nRowPerTrans>0
124          && (sqlite3_last_insert_rowid(p->db) % p->nRowPerTrans)==0
125   ){
126     sqlite3_exec(p->db, "COMMIT ; BEGIN", 0, 0, 0);
127   }
128 }
129 
130 /*
131 ** Recursively traverse directory zDir. For each file that is not a
132 ** directory, invoke the supplied callback with its path.
133 */
traverse(const char * zDir,void * pCtx,void (* xCallback)(void *,const char * zPath))134 static void traverse(
135   const char *zDir,               /* Directory to traverse */
136   void *pCtx,                     /* First argument passed to callback */
137   void (*xCallback)(void*, const char *zPath)
138 ){
139   DIR *d;
140   struct dirent *e;
141 
142   d = opendir(zDir);
143   if( d==0 ) error_out("opendir()");
144 
145   for(e=readdir(d); e; e=readdir(d)){
146     if( strcmp(e->d_name, ".")==0 || strcmp(e->d_name, "..")==0 ) continue;
147     char *zPath = sqlite3_mprintf("%s/%s", zDir, e->d_name);
148     if (e->d_type & DT_DIR) {
149       traverse(zPath, pCtx, xCallback);
150     }else{
151       xCallback(pCtx, zPath);
152     }
153     sqlite3_free(zPath);
154   }
155 
156   closedir(d);
157 }
158 
main(int argc,char ** argv)159 int main(int argc, char **argv){
160   int iFts = 5;                   /* Value of -fts option */
161   int bMap = 0;                   /* True to create mapping table */
162   const char *zDir = ".";         /* Directory to scan */
163   int i;
164   int rc;
165   int nRowPerTrans = 0;
166   sqlite3 *db;
167   char *zSql;
168   VisitContext sCtx;
169 
170   int nCmd = 0;
171   char **aCmd = 0;
172 
173   if( argc % 2 ) showHelp(argv[0]);
174 
175   for(i=1; i<(argc-1); i+=2){
176     char *zOpt = argv[i];
177     char *zArg = argv[i+1];
178     if( strcmp(zOpt, "-fts")==0 ){
179       iFts = atoi(zArg);
180       if( iFts!=3 && iFts!=4 && iFts!= 5) showHelp(argv[0]);
181     }
182     else if( strcmp(zOpt, "-trans")==0 ){
183       nRowPerTrans = atoi(zArg);
184     }
185     else if( strcmp(zOpt, "-idx")==0 ){
186       bMap = atoi(zArg);
187       if( bMap!=0 && bMap!=1 ) showHelp(argv[0]);
188     }
189     else if( strcmp(zOpt, "-dir")==0 ){
190       zDir = zArg;
191     }
192     else if( strcmp(zOpt, "-special")==0 ){
193       nCmd++;
194       aCmd = sqlite3_realloc(aCmd, sizeof(char*) * nCmd);
195       aCmd[nCmd-1] = zArg;
196     }
197     else{
198       showHelp(argv[0]);
199     }
200   }
201 
202   /* Open the database file */
203   rc = sqlite3_open(argv[argc-1], &db);
204   if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_open()", db);
205 
206   rc = sqlite3_create_function(db, "readtext", 1, SQLITE_UTF8, 0,
207                                readfileFunc, 0, 0);
208   if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_create_function()", db);
209 
210   /* Create the FTS table */
211   zSql = sqlite3_mprintf("CREATE VIRTUAL TABLE fts USING fts%d(content)", iFts);
212   rc = sqlite3_exec(db, zSql, 0, 0, 0);
213   if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
214   sqlite3_free(zSql);
215 
216   for(i=0; i<nCmd; i++){
217     zSql = sqlite3_mprintf("INSERT INTO fts(fts) VALUES(%Q)", aCmd[i]);
218     rc = sqlite3_exec(db, zSql, 0, 0, 0);
219     if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_exec(1)", db);
220     sqlite3_free(zSql);
221   }
222 
223   /* Compile the INSERT statement to write data to the FTS table. */
224   memset(&sCtx, 0, sizeof(VisitContext));
225   sCtx.db = db;
226   sCtx.nRowPerTrans = nRowPerTrans;
227   rc = sqlite3_prepare_v2(db,
228       "INSERT INTO fts VALUES(readtext(?))", -1, &sCtx.pInsert, 0
229   );
230   if( rc!=SQLITE_OK ) sqlite_error_out("sqlite3_prepare_v2(1)", db);
231 
232   /* Load all files in the directory hierarchy into the FTS table. */
233   if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "BEGIN", 0, 0, 0);
234   traverse(zDir, (void*)&sCtx, visit_file);
235   if( sCtx.nRowPerTrans>0 ) sqlite3_exec(db, "COMMIT", 0, 0, 0);
236 
237   /* Clean up and exit. */
238   sqlite3_finalize(sCtx.pInsert);
239   sqlite3_close(db);
240   sqlite3_free(aCmd);
241   return 0;
242 }
243