xref: /sqlite-3.40.0/ext/fts5/fts5_vocab.c (revision 913306a5)
1 /*
2 ** 2015 May 08
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This is an SQLite virtual table module implementing direct access to an
14 ** existing FTS5 index. The module may create several different types of
15 ** tables:
16 **
17 ** col:
18 **     CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
19 **
20 **   One row for each term/column combination. The value of $doc is set to
21 **   the number of fts5 rows that contain at least one instance of term
22 **   $term within column $col. Field $cnt is set to the total number of
23 **   instances of term $term in column $col (in any row of the fts5 table).
24 **
25 ** row:
26 **     CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
27 **
28 **   One row for each term in the database. The value of $doc is set to
29 **   the number of fts5 rows that contain at least one instance of term
30 **   $term. Field $cnt is set to the total number of instances of term
31 **   $term in the database.
32 **
33 ** instance:
34 **     CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
35 **
36 **   One row for each term instance in the database.
37 */
38 
39 
40 #include "fts5Int.h"
41 
42 
43 typedef struct Fts5VocabTable Fts5VocabTable;
44 typedef struct Fts5VocabCursor Fts5VocabCursor;
45 
46 struct Fts5VocabTable {
47   sqlite3_vtab base;
48   char *zFts5Tbl;                 /* Name of fts5 table */
49   char *zFts5Db;                  /* Db containing fts5 table */
50   sqlite3 *db;                    /* Database handle */
51   Fts5Global *pGlobal;            /* FTS5 global object for this database */
52   int eType;                      /* FTS5_VOCAB_COL, ROW or INSTANCE */
53   unsigned bBusy;                 /* True if busy */
54 };
55 
56 struct Fts5VocabCursor {
57   sqlite3_vtab_cursor base;
58   sqlite3_stmt *pStmt;            /* Statement holding lock on pIndex */
59   Fts5Table *pFts5;               /* Associated FTS5 table */
60 
61   int bEof;                       /* True if this cursor is at EOF */
62   Fts5IndexIter *pIter;           /* Term/rowid iterator object */
63   void *pStruct;                  /* From sqlite3Fts5StructureRef() */
64 
65   int nLeTerm;                    /* Size of zLeTerm in bytes */
66   char *zLeTerm;                  /* (term <= $zLeTerm) paramater, or NULL */
67 
68   /* These are used by 'col' tables only */
69   int iCol;
70   i64 *aCnt;
71   i64 *aDoc;
72 
73   /* Output values used by all tables. */
74   i64 rowid;                      /* This table's current rowid value */
75   Fts5Buffer term;                /* Current value of 'term' column */
76 
77   /* Output values Used by 'instance' tables only */
78   i64 iInstPos;
79   int iInstOff;
80 };
81 
82 #define FTS5_VOCAB_COL      0
83 #define FTS5_VOCAB_ROW      1
84 #define FTS5_VOCAB_INSTANCE 2
85 
86 #define FTS5_VOCAB_COL_SCHEMA  "term, col, doc, cnt"
87 #define FTS5_VOCAB_ROW_SCHEMA  "term, doc, cnt"
88 #define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
89 
90 /*
91 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
92 */
93 #define FTS5_VOCAB_TERM_EQ 0x01
94 #define FTS5_VOCAB_TERM_GE 0x02
95 #define FTS5_VOCAB_TERM_LE 0x04
96 
97 
98 /*
99 ** Translate a string containing an fts5vocab table type to an
100 ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
101 ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
102 ** and return SQLITE_ERROR.
103 */
fts5VocabTableType(const char * zType,char ** pzErr,int * peType)104 static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
105   int rc = SQLITE_OK;
106   char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
107   if( rc==SQLITE_OK ){
108     sqlite3Fts5Dequote(zCopy);
109     if( sqlite3_stricmp(zCopy, "col")==0 ){
110       *peType = FTS5_VOCAB_COL;
111     }else
112 
113     if( sqlite3_stricmp(zCopy, "row")==0 ){
114       *peType = FTS5_VOCAB_ROW;
115     }else
116     if( sqlite3_stricmp(zCopy, "instance")==0 ){
117       *peType = FTS5_VOCAB_INSTANCE;
118     }else
119     {
120       *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
121       rc = SQLITE_ERROR;
122     }
123     sqlite3_free(zCopy);
124   }
125 
126   return rc;
127 }
128 
129 
130 /*
131 ** The xDisconnect() virtual table method.
132 */
fts5VocabDisconnectMethod(sqlite3_vtab * pVtab)133 static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
134   Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
135   sqlite3_free(pTab);
136   return SQLITE_OK;
137 }
138 
139 /*
140 ** The xDestroy() virtual table method.
141 */
fts5VocabDestroyMethod(sqlite3_vtab * pVtab)142 static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
143   Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
144   sqlite3_free(pTab);
145   return SQLITE_OK;
146 }
147 
148 /*
149 ** This function is the implementation of both the xConnect and xCreate
150 ** methods of the FTS3 virtual table.
151 **
152 ** The argv[] array contains the following:
153 **
154 **   argv[0]   -> module name  ("fts5vocab")
155 **   argv[1]   -> database name
156 **   argv[2]   -> table name
157 **
158 ** then:
159 **
160 **   argv[3]   -> name of fts5 table
161 **   argv[4]   -> type of fts5vocab table
162 **
163 ** or, for tables in the TEMP schema only.
164 **
165 **   argv[3]   -> name of fts5 tables database
166 **   argv[4]   -> name of fts5 table
167 **   argv[5]   -> type of fts5vocab table
168 */
fts5VocabInitVtab(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVTab,char ** pzErr)169 static int fts5VocabInitVtab(
170   sqlite3 *db,                    /* The SQLite database connection */
171   void *pAux,                     /* Pointer to Fts5Global object */
172   int argc,                       /* Number of elements in argv array */
173   const char * const *argv,       /* xCreate/xConnect argument array */
174   sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
175   char **pzErr                    /* Write any error message here */
176 ){
177   const char *azSchema[] = {
178     "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA  ")",
179     "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA  ")",
180     "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
181   };
182 
183   Fts5VocabTable *pRet = 0;
184   int rc = SQLITE_OK;             /* Return code */
185   int bDb;
186 
187   bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
188 
189   if( argc!=5 && bDb==0 ){
190     *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
191     rc = SQLITE_ERROR;
192   }else{
193     int nByte;                      /* Bytes of space to allocate */
194     const char *zDb = bDb ? argv[3] : argv[1];
195     const char *zTab = bDb ? argv[4] : argv[3];
196     const char *zType = bDb ? argv[5] : argv[4];
197     int nDb = (int)strlen(zDb)+1;
198     int nTab = (int)strlen(zTab)+1;
199     int eType = 0;
200 
201     rc = fts5VocabTableType(zType, pzErr, &eType);
202     if( rc==SQLITE_OK ){
203       assert( eType>=0 && eType<ArraySize(azSchema) );
204       rc = sqlite3_declare_vtab(db, azSchema[eType]);
205     }
206 
207     nByte = sizeof(Fts5VocabTable) + nDb + nTab;
208     pRet = sqlite3Fts5MallocZero(&rc, nByte);
209     if( pRet ){
210       pRet->pGlobal = (Fts5Global*)pAux;
211       pRet->eType = eType;
212       pRet->db = db;
213       pRet->zFts5Tbl = (char*)&pRet[1];
214       pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
215       memcpy(pRet->zFts5Tbl, zTab, nTab);
216       memcpy(pRet->zFts5Db, zDb, nDb);
217       sqlite3Fts5Dequote(pRet->zFts5Tbl);
218       sqlite3Fts5Dequote(pRet->zFts5Db);
219     }
220   }
221 
222   *ppVTab = (sqlite3_vtab*)pRet;
223   return rc;
224 }
225 
226 
227 /*
228 ** The xConnect() and xCreate() methods for the virtual table. All the
229 ** work is done in function fts5VocabInitVtab().
230 */
fts5VocabConnectMethod(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)231 static int fts5VocabConnectMethod(
232   sqlite3 *db,                    /* Database connection */
233   void *pAux,                     /* Pointer to tokenizer hash table */
234   int argc,                       /* Number of elements in argv array */
235   const char * const *argv,       /* xCreate/xConnect argument array */
236   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
237   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
238 ){
239   return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
240 }
fts5VocabCreateMethod(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)241 static int fts5VocabCreateMethod(
242   sqlite3 *db,                    /* Database connection */
243   void *pAux,                     /* Pointer to tokenizer hash table */
244   int argc,                       /* Number of elements in argv array */
245   const char * const *argv,       /* xCreate/xConnect argument array */
246   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
247   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
248 ){
249   return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
250 }
251 
252 /*
253 ** Implementation of the xBestIndex method.
254 **
255 ** Only constraints of the form:
256 **
257 **     term <= ?
258 **     term == ?
259 **     term >= ?
260 **
261 ** are interpreted. Less-than and less-than-or-equal are treated
262 ** identically, as are greater-than and greater-than-or-equal.
263 */
fts5VocabBestIndexMethod(sqlite3_vtab * pUnused,sqlite3_index_info * pInfo)264 static int fts5VocabBestIndexMethod(
265   sqlite3_vtab *pUnused,
266   sqlite3_index_info *pInfo
267 ){
268   int i;
269   int iTermEq = -1;
270   int iTermGe = -1;
271   int iTermLe = -1;
272   int idxNum = 0;
273   int nArg = 0;
274 
275   UNUSED_PARAM(pUnused);
276 
277   for(i=0; i<pInfo->nConstraint; i++){
278     struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
279     if( p->usable==0 ) continue;
280     if( p->iColumn==0 ){          /* term column */
281       if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
282       if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
283       if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
284       if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
285       if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
286     }
287   }
288 
289   if( iTermEq>=0 ){
290     idxNum |= FTS5_VOCAB_TERM_EQ;
291     pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
292     pInfo->estimatedCost = 100;
293   }else{
294     pInfo->estimatedCost = 1000000;
295     if( iTermGe>=0 ){
296       idxNum |= FTS5_VOCAB_TERM_GE;
297       pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
298       pInfo->estimatedCost = pInfo->estimatedCost / 2;
299     }
300     if( iTermLe>=0 ){
301       idxNum |= FTS5_VOCAB_TERM_LE;
302       pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
303       pInfo->estimatedCost = pInfo->estimatedCost / 2;
304     }
305   }
306 
307   /* This virtual table always delivers results in ascending order of
308   ** the "term" column (column 0). So if the user has requested this
309   ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
310   ** sqlite3_index_info.orderByConsumed flag to tell the core the results
311   ** are already in sorted order.  */
312   if( pInfo->nOrderBy==1
313    && pInfo->aOrderBy[0].iColumn==0
314    && pInfo->aOrderBy[0].desc==0
315   ){
316     pInfo->orderByConsumed = 1;
317   }
318 
319   pInfo->idxNum = idxNum;
320   return SQLITE_OK;
321 }
322 
323 /*
324 ** Implementation of xOpen method.
325 */
fts5VocabOpenMethod(sqlite3_vtab * pVTab,sqlite3_vtab_cursor ** ppCsr)326 static int fts5VocabOpenMethod(
327   sqlite3_vtab *pVTab,
328   sqlite3_vtab_cursor **ppCsr
329 ){
330   Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
331   Fts5Table *pFts5 = 0;
332   Fts5VocabCursor *pCsr = 0;
333   int rc = SQLITE_OK;
334   sqlite3_stmt *pStmt = 0;
335   char *zSql = 0;
336 
337   if( pTab->bBusy ){
338     pVTab->zErrMsg = sqlite3_mprintf(
339        "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
340     );
341     return SQLITE_ERROR;
342   }
343   zSql = sqlite3Fts5Mprintf(&rc,
344       "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
345       pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
346   );
347   if( zSql ){
348     rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
349   }
350   sqlite3_free(zSql);
351   assert( rc==SQLITE_OK || pStmt==0 );
352   if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
353 
354   pTab->bBusy = 1;
355   if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
356     i64 iId = sqlite3_column_int64(pStmt, 0);
357     pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId);
358   }
359   pTab->bBusy = 0;
360 
361   if( rc==SQLITE_OK ){
362     if( pFts5==0 ){
363       rc = sqlite3_finalize(pStmt);
364       pStmt = 0;
365       if( rc==SQLITE_OK ){
366         pVTab->zErrMsg = sqlite3_mprintf(
367             "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
368             );
369         rc = SQLITE_ERROR;
370       }
371     }else{
372       rc = sqlite3Fts5FlushToDisk(pFts5);
373     }
374   }
375 
376   if( rc==SQLITE_OK ){
377     i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor);
378     pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
379   }
380 
381   if( pCsr ){
382     pCsr->pFts5 = pFts5;
383     pCsr->pStmt = pStmt;
384     pCsr->aCnt = (i64*)&pCsr[1];
385     pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol];
386   }else{
387     sqlite3_finalize(pStmt);
388   }
389 
390   *ppCsr = (sqlite3_vtab_cursor*)pCsr;
391   return rc;
392 }
393 
fts5VocabResetCursor(Fts5VocabCursor * pCsr)394 static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
395   pCsr->rowid = 0;
396   sqlite3Fts5IterClose(pCsr->pIter);
397   sqlite3Fts5StructureRelease(pCsr->pStruct);
398   pCsr->pStruct = 0;
399   pCsr->pIter = 0;
400   sqlite3_free(pCsr->zLeTerm);
401   pCsr->nLeTerm = -1;
402   pCsr->zLeTerm = 0;
403   pCsr->bEof = 0;
404 }
405 
406 /*
407 ** Close the cursor.  For additional information see the documentation
408 ** on the xClose method of the virtual table interface.
409 */
fts5VocabCloseMethod(sqlite3_vtab_cursor * pCursor)410 static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
411   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
412   fts5VocabResetCursor(pCsr);
413   sqlite3Fts5BufferFree(&pCsr->term);
414   sqlite3_finalize(pCsr->pStmt);
415   sqlite3_free(pCsr);
416   return SQLITE_OK;
417 }
418 
fts5VocabInstanceNewTerm(Fts5VocabCursor * pCsr)419 static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
420   int rc = SQLITE_OK;
421 
422   if( sqlite3Fts5IterEof(pCsr->pIter) ){
423     pCsr->bEof = 1;
424   }else{
425     const char *zTerm;
426     int nTerm;
427     zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
428     if( pCsr->nLeTerm>=0 ){
429       int nCmp = MIN(nTerm, pCsr->nLeTerm);
430       int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
431       if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
432         pCsr->bEof = 1;
433       }
434     }
435 
436     sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
437   }
438   return rc;
439 }
440 
fts5VocabInstanceNext(Fts5VocabCursor * pCsr)441 static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
442   int eDetail = pCsr->pFts5->pConfig->eDetail;
443   int rc = SQLITE_OK;
444   Fts5IndexIter *pIter = pCsr->pIter;
445   i64 *pp = &pCsr->iInstPos;
446   int *po = &pCsr->iInstOff;
447 
448   assert( sqlite3Fts5IterEof(pIter)==0 );
449   assert( pCsr->bEof==0 );
450   while( eDetail==FTS5_DETAIL_NONE
451       || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
452   ){
453     pCsr->iInstPos = 0;
454     pCsr->iInstOff = 0;
455 
456     rc = sqlite3Fts5IterNextScan(pCsr->pIter);
457     if( rc==SQLITE_OK ){
458       rc = fts5VocabInstanceNewTerm(pCsr);
459       if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break;
460     }
461     if( rc ){
462       pCsr->bEof = 1;
463       break;
464     }
465   }
466 
467   return rc;
468 }
469 
470 /*
471 ** Advance the cursor to the next row in the table.
472 */
fts5VocabNextMethod(sqlite3_vtab_cursor * pCursor)473 static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
474   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
475   Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
476   int nCol = pCsr->pFts5->pConfig->nCol;
477   int rc;
478 
479   rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct);
480   if( rc!=SQLITE_OK ) return rc;
481   pCsr->rowid++;
482 
483   if( pTab->eType==FTS5_VOCAB_INSTANCE ){
484     return fts5VocabInstanceNext(pCsr);
485   }
486 
487   if( pTab->eType==FTS5_VOCAB_COL ){
488     for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
489       if( pCsr->aDoc[pCsr->iCol] ) break;
490     }
491   }
492 
493   if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
494     if( sqlite3Fts5IterEof(pCsr->pIter) ){
495       pCsr->bEof = 1;
496     }else{
497       const char *zTerm;
498       int nTerm;
499 
500       zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
501       assert( nTerm>=0 );
502       if( pCsr->nLeTerm>=0 ){
503         int nCmp = MIN(nTerm, pCsr->nLeTerm);
504         int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
505         if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
506           pCsr->bEof = 1;
507           return SQLITE_OK;
508         }
509       }
510 
511       sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
512       memset(pCsr->aCnt, 0, nCol * sizeof(i64));
513       memset(pCsr->aDoc, 0, nCol * sizeof(i64));
514       pCsr->iCol = 0;
515 
516       assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
517       while( rc==SQLITE_OK ){
518         int eDetail = pCsr->pFts5->pConfig->eDetail;
519         const u8 *pPos; int nPos;   /* Position list */
520         i64 iPos = 0;               /* 64-bit position read from poslist */
521         int iOff = 0;               /* Current offset within position list */
522 
523         pPos = pCsr->pIter->pData;
524         nPos = pCsr->pIter->nData;
525 
526         switch( pTab->eType ){
527           case FTS5_VOCAB_ROW:
528             if( eDetail==FTS5_DETAIL_FULL ){
529               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
530                 pCsr->aCnt[0]++;
531               }
532             }
533             pCsr->aDoc[0]++;
534             break;
535 
536           case FTS5_VOCAB_COL:
537             if( eDetail==FTS5_DETAIL_FULL ){
538               int iCol = -1;
539               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
540                 int ii = FTS5_POS2COLUMN(iPos);
541                 if( iCol!=ii ){
542                   if( ii>=nCol ){
543                     rc = FTS5_CORRUPT;
544                     break;
545                   }
546                   pCsr->aDoc[ii]++;
547                   iCol = ii;
548                 }
549                 pCsr->aCnt[ii]++;
550               }
551             }else if( eDetail==FTS5_DETAIL_COLUMNS ){
552               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
553                 assert_nc( iPos>=0 && iPos<nCol );
554                 if( iPos>=nCol ){
555                   rc = FTS5_CORRUPT;
556                   break;
557                 }
558                 pCsr->aDoc[iPos]++;
559               }
560             }else{
561               assert( eDetail==FTS5_DETAIL_NONE );
562               pCsr->aDoc[0]++;
563             }
564             break;
565 
566           default:
567             assert( pTab->eType==FTS5_VOCAB_INSTANCE );
568             break;
569         }
570 
571         if( rc==SQLITE_OK ){
572           rc = sqlite3Fts5IterNextScan(pCsr->pIter);
573         }
574         if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
575 
576         if( rc==SQLITE_OK ){
577           zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
578           if( nTerm!=pCsr->term.n
579           || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm))
580           ){
581             break;
582           }
583           if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
584         }
585       }
586     }
587   }
588 
589   if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
590     for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++);
591     if( pCsr->iCol==nCol ){
592       rc = FTS5_CORRUPT;
593     }
594   }
595   return rc;
596 }
597 
598 /*
599 ** This is the xFilter implementation for the virtual table.
600 */
fts5VocabFilterMethod(sqlite3_vtab_cursor * pCursor,int idxNum,const char * zUnused,int nUnused,sqlite3_value ** apVal)601 static int fts5VocabFilterMethod(
602   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
603   int idxNum,                     /* Strategy index */
604   const char *zUnused,            /* Unused */
605   int nUnused,                    /* Number of elements in apVal */
606   sqlite3_value **apVal           /* Arguments for the indexing scheme */
607 ){
608   Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
609   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
610   int eType = pTab->eType;
611   int rc = SQLITE_OK;
612 
613   int iVal = 0;
614   int f = FTS5INDEX_QUERY_SCAN;
615   const char *zTerm = 0;
616   int nTerm = 0;
617 
618   sqlite3_value *pEq = 0;
619   sqlite3_value *pGe = 0;
620   sqlite3_value *pLe = 0;
621 
622   UNUSED_PARAM2(zUnused, nUnused);
623 
624   fts5VocabResetCursor(pCsr);
625   if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
626   if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
627   if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
628 
629   if( pEq ){
630     zTerm = (const char *)sqlite3_value_text(pEq);
631     nTerm = sqlite3_value_bytes(pEq);
632     f = 0;
633   }else{
634     if( pGe ){
635       zTerm = (const char *)sqlite3_value_text(pGe);
636       nTerm = sqlite3_value_bytes(pGe);
637     }
638     if( pLe ){
639       const char *zCopy = (const char *)sqlite3_value_text(pLe);
640       if( zCopy==0 ) zCopy = "";
641       pCsr->nLeTerm = sqlite3_value_bytes(pLe);
642       pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
643       if( pCsr->zLeTerm==0 ){
644         rc = SQLITE_NOMEM;
645       }else{
646         memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
647       }
648     }
649   }
650 
651   if( rc==SQLITE_OK ){
652     Fts5Index *pIndex = pCsr->pFts5->pIndex;
653     rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
654     if( rc==SQLITE_OK ){
655       pCsr->pStruct = sqlite3Fts5StructureRef(pIndex);
656     }
657   }
658   if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
659     rc = fts5VocabInstanceNewTerm(pCsr);
660   }
661   if( rc==SQLITE_OK && !pCsr->bEof
662    && (eType!=FTS5_VOCAB_INSTANCE
663     || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE)
664   ){
665     rc = fts5VocabNextMethod(pCursor);
666   }
667 
668   return rc;
669 }
670 
671 /*
672 ** This is the xEof method of the virtual table. SQLite calls this
673 ** routine to find out if it has reached the end of a result set.
674 */
fts5VocabEofMethod(sqlite3_vtab_cursor * pCursor)675 static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
676   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
677   return pCsr->bEof;
678 }
679 
fts5VocabColumnMethod(sqlite3_vtab_cursor * pCursor,sqlite3_context * pCtx,int iCol)680 static int fts5VocabColumnMethod(
681   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
682   sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
683   int iCol                        /* Index of column to read value from */
684 ){
685   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
686   int eDetail = pCsr->pFts5->pConfig->eDetail;
687   int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
688   i64 iVal = 0;
689 
690   if( iCol==0 ){
691     sqlite3_result_text(
692         pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
693     );
694   }else if( eType==FTS5_VOCAB_COL ){
695     assert( iCol==1 || iCol==2 || iCol==3 );
696     if( iCol==1 ){
697       if( eDetail!=FTS5_DETAIL_NONE ){
698         const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol];
699         sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
700       }
701     }else if( iCol==2 ){
702       iVal = pCsr->aDoc[pCsr->iCol];
703     }else{
704       iVal = pCsr->aCnt[pCsr->iCol];
705     }
706   }else if( eType==FTS5_VOCAB_ROW ){
707     assert( iCol==1 || iCol==2 );
708     if( iCol==1 ){
709       iVal = pCsr->aDoc[0];
710     }else{
711       iVal = pCsr->aCnt[0];
712     }
713   }else{
714     assert( eType==FTS5_VOCAB_INSTANCE );
715     switch( iCol ){
716       case 1:
717         sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
718         break;
719       case 2: {
720         int ii = -1;
721         if( eDetail==FTS5_DETAIL_FULL ){
722           ii = FTS5_POS2COLUMN(pCsr->iInstPos);
723         }else if( eDetail==FTS5_DETAIL_COLUMNS ){
724           ii = (int)pCsr->iInstPos;
725         }
726         if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){
727           const char *z = pCsr->pFts5->pConfig->azCol[ii];
728           sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
729         }
730         break;
731       }
732       default: {
733         assert( iCol==3 );
734         if( eDetail==FTS5_DETAIL_FULL ){
735           int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
736           sqlite3_result_int(pCtx, ii);
737         }
738         break;
739       }
740     }
741   }
742 
743   if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
744   return SQLITE_OK;
745 }
746 
747 /*
748 ** This is the xRowid method. The SQLite core calls this routine to
749 ** retrieve the rowid for the current row of the result set. The
750 ** rowid should be written to *pRowid.
751 */
fts5VocabRowidMethod(sqlite3_vtab_cursor * pCursor,sqlite_int64 * pRowid)752 static int fts5VocabRowidMethod(
753   sqlite3_vtab_cursor *pCursor,
754   sqlite_int64 *pRowid
755 ){
756   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
757   *pRowid = pCsr->rowid;
758   return SQLITE_OK;
759 }
760 
sqlite3Fts5VocabInit(Fts5Global * pGlobal,sqlite3 * db)761 int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
762   static const sqlite3_module fts5Vocab = {
763     /* iVersion      */ 2,
764     /* xCreate       */ fts5VocabCreateMethod,
765     /* xConnect      */ fts5VocabConnectMethod,
766     /* xBestIndex    */ fts5VocabBestIndexMethod,
767     /* xDisconnect   */ fts5VocabDisconnectMethod,
768     /* xDestroy      */ fts5VocabDestroyMethod,
769     /* xOpen         */ fts5VocabOpenMethod,
770     /* xClose        */ fts5VocabCloseMethod,
771     /* xFilter       */ fts5VocabFilterMethod,
772     /* xNext         */ fts5VocabNextMethod,
773     /* xEof          */ fts5VocabEofMethod,
774     /* xColumn       */ fts5VocabColumnMethod,
775     /* xRowid        */ fts5VocabRowidMethod,
776     /* xUpdate       */ 0,
777     /* xBegin        */ 0,
778     /* xSync         */ 0,
779     /* xCommit       */ 0,
780     /* xRollback     */ 0,
781     /* xFindFunction */ 0,
782     /* xRename       */ 0,
783     /* xSavepoint    */ 0,
784     /* xRelease      */ 0,
785     /* xRollbackTo   */ 0,
786     /* xShadowName   */ 0
787   };
788   void *p = (void*)pGlobal;
789 
790   return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
791 }
792