xref: /sqlite-3.40.0/ext/fts3/fts3_aux.c (revision 156d4410)
1 /*
2 ** 2011 Jan 27
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 */
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
16 
17 #include <string.h>
18 #include <assert.h>
19 
20 typedef struct Fts3auxTable Fts3auxTable;
21 typedef struct Fts3auxCursor Fts3auxCursor;
22 
23 struct Fts3auxTable {
24   sqlite3_vtab base;              /* Base class used by SQLite core */
25   Fts3Table *pFts3Tab;
26 };
27 
28 struct Fts3auxCursor {
29   sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
30   Fts3MultiSegReader csr;        /* Must be right after "base" */
31   Fts3SegFilter filter;
32   char *zStop;
33   int nStop;                      /* Byte-length of string zStop */
34   int iLangid;                    /* Language id to query */
35   int isEof;                      /* True if cursor is at EOF */
36   sqlite3_int64 iRowid;           /* Current rowid */
37 
38   int iCol;                       /* Current value of 'col' column */
39   int nStat;                      /* Size of aStat[] array */
40   struct Fts3auxColstats {
41     sqlite3_int64 nDoc;           /* 'documents' values for current csr row */
42     sqlite3_int64 nOcc;           /* 'occurrences' values for current csr row */
43   } *aStat;
44 };
45 
46 /*
47 ** Schema of the terms table.
48 */
49 #define FTS3_AUX_SCHEMA \
50   "CREATE TABLE x(term, col, documents, occurrences, languageid HIDDEN)"
51 
52 /*
53 ** This function does all the work for both the xConnect and xCreate methods.
54 ** These tables have no persistent representation of their own, so xConnect
55 ** and xCreate are identical operations.
56 */
fts3auxConnectMethod(sqlite3 * db,void * pUnused,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)57 static int fts3auxConnectMethod(
58   sqlite3 *db,                    /* Database connection */
59   void *pUnused,                  /* Unused */
60   int argc,                       /* Number of elements in argv array */
61   const char * const *argv,       /* xCreate/xConnect argument array */
62   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
63   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
64 ){
65   char const *zDb;                /* Name of database (e.g. "main") */
66   char const *zFts3;              /* Name of fts3 table */
67   int nDb;                        /* Result of strlen(zDb) */
68   int nFts3;                      /* Result of strlen(zFts3) */
69   sqlite3_int64 nByte;            /* Bytes of space to allocate here */
70   int rc;                         /* value returned by declare_vtab() */
71   Fts3auxTable *p;                /* Virtual table object to return */
72 
73   UNUSED_PARAMETER(pUnused);
74 
75   /* The user should invoke this in one of two forms:
76   **
77   **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table);
78   **     CREATE VIRTUAL TABLE xxx USING fts4aux(fts4-table-db, fts4-table);
79   */
80   if( argc!=4 && argc!=5 ) goto bad_args;
81 
82   zDb = argv[1];
83   nDb = (int)strlen(zDb);
84   if( argc==5 ){
85     if( nDb==4 && 0==sqlite3_strnicmp("temp", zDb, 4) ){
86       zDb = argv[3];
87       nDb = (int)strlen(zDb);
88       zFts3 = argv[4];
89     }else{
90       goto bad_args;
91     }
92   }else{
93     zFts3 = argv[3];
94   }
95   nFts3 = (int)strlen(zFts3);
96 
97   rc = sqlite3_declare_vtab(db, FTS3_AUX_SCHEMA);
98   if( rc!=SQLITE_OK ) return rc;
99 
100   nByte = sizeof(Fts3auxTable) + sizeof(Fts3Table) + nDb + nFts3 + 2;
101   p = (Fts3auxTable *)sqlite3_malloc64(nByte);
102   if( !p ) return SQLITE_NOMEM;
103   memset(p, 0, nByte);
104 
105   p->pFts3Tab = (Fts3Table *)&p[1];
106   p->pFts3Tab->zDb = (char *)&p->pFts3Tab[1];
107   p->pFts3Tab->zName = &p->pFts3Tab->zDb[nDb+1];
108   p->pFts3Tab->db = db;
109   p->pFts3Tab->nIndex = 1;
110 
111   memcpy((char *)p->pFts3Tab->zDb, zDb, nDb);
112   memcpy((char *)p->pFts3Tab->zName, zFts3, nFts3);
113   sqlite3Fts3Dequote((char *)p->pFts3Tab->zName);
114 
115   *ppVtab = (sqlite3_vtab *)p;
116   return SQLITE_OK;
117 
118  bad_args:
119   sqlite3Fts3ErrMsg(pzErr, "invalid arguments to fts4aux constructor");
120   return SQLITE_ERROR;
121 }
122 
123 /*
124 ** This function does the work for both the xDisconnect and xDestroy methods.
125 ** These tables have no persistent representation of their own, so xDisconnect
126 ** and xDestroy are identical operations.
127 */
fts3auxDisconnectMethod(sqlite3_vtab * pVtab)128 static int fts3auxDisconnectMethod(sqlite3_vtab *pVtab){
129   Fts3auxTable *p = (Fts3auxTable *)pVtab;
130   Fts3Table *pFts3 = p->pFts3Tab;
131   int i;
132 
133   /* Free any prepared statements held */
134   for(i=0; i<SizeofArray(pFts3->aStmt); i++){
135     sqlite3_finalize(pFts3->aStmt[i]);
136   }
137   sqlite3_free(pFts3->zSegmentsTbl);
138   sqlite3_free(p);
139   return SQLITE_OK;
140 }
141 
142 #define FTS4AUX_EQ_CONSTRAINT 1
143 #define FTS4AUX_GE_CONSTRAINT 2
144 #define FTS4AUX_LE_CONSTRAINT 4
145 
146 /*
147 ** xBestIndex - Analyze a WHERE and ORDER BY clause.
148 */
fts3auxBestIndexMethod(sqlite3_vtab * pVTab,sqlite3_index_info * pInfo)149 static int fts3auxBestIndexMethod(
150   sqlite3_vtab *pVTab,
151   sqlite3_index_info *pInfo
152 ){
153   int i;
154   int iEq = -1;
155   int iGe = -1;
156   int iLe = -1;
157   int iLangid = -1;
158   int iNext = 1;                  /* Next free argvIndex value */
159 
160   UNUSED_PARAMETER(pVTab);
161 
162   /* This vtab delivers always results in "ORDER BY term ASC" order. */
163   if( pInfo->nOrderBy==1
164    && pInfo->aOrderBy[0].iColumn==0
165    && pInfo->aOrderBy[0].desc==0
166   ){
167     pInfo->orderByConsumed = 1;
168   }
169 
170   /* Search for equality and range constraints on the "term" column.
171   ** And equality constraints on the hidden "languageid" column. */
172   for(i=0; i<pInfo->nConstraint; i++){
173     if( pInfo->aConstraint[i].usable ){
174       int op = pInfo->aConstraint[i].op;
175       int iCol = pInfo->aConstraint[i].iColumn;
176 
177       if( iCol==0 ){
178         if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iEq = i;
179         if( op==SQLITE_INDEX_CONSTRAINT_LT ) iLe = i;
180         if( op==SQLITE_INDEX_CONSTRAINT_LE ) iLe = i;
181         if( op==SQLITE_INDEX_CONSTRAINT_GT ) iGe = i;
182         if( op==SQLITE_INDEX_CONSTRAINT_GE ) iGe = i;
183       }
184       if( iCol==4 ){
185         if( op==SQLITE_INDEX_CONSTRAINT_EQ ) iLangid = i;
186       }
187     }
188   }
189 
190   if( iEq>=0 ){
191     pInfo->idxNum = FTS4AUX_EQ_CONSTRAINT;
192     pInfo->aConstraintUsage[iEq].argvIndex = iNext++;
193     pInfo->estimatedCost = 5;
194   }else{
195     pInfo->idxNum = 0;
196     pInfo->estimatedCost = 20000;
197     if( iGe>=0 ){
198       pInfo->idxNum += FTS4AUX_GE_CONSTRAINT;
199       pInfo->aConstraintUsage[iGe].argvIndex = iNext++;
200       pInfo->estimatedCost /= 2;
201     }
202     if( iLe>=0 ){
203       pInfo->idxNum += FTS4AUX_LE_CONSTRAINT;
204       pInfo->aConstraintUsage[iLe].argvIndex = iNext++;
205       pInfo->estimatedCost /= 2;
206     }
207   }
208   if( iLangid>=0 ){
209     pInfo->aConstraintUsage[iLangid].argvIndex = iNext++;
210     pInfo->estimatedCost--;
211   }
212 
213   return SQLITE_OK;
214 }
215 
216 /*
217 ** xOpen - Open a cursor.
218 */
fts3auxOpenMethod(sqlite3_vtab * pVTab,sqlite3_vtab_cursor ** ppCsr)219 static int fts3auxOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
220   Fts3auxCursor *pCsr;            /* Pointer to cursor object to return */
221 
222   UNUSED_PARAMETER(pVTab);
223 
224   pCsr = (Fts3auxCursor *)sqlite3_malloc(sizeof(Fts3auxCursor));
225   if( !pCsr ) return SQLITE_NOMEM;
226   memset(pCsr, 0, sizeof(Fts3auxCursor));
227 
228   *ppCsr = (sqlite3_vtab_cursor *)pCsr;
229   return SQLITE_OK;
230 }
231 
232 /*
233 ** xClose - Close a cursor.
234 */
fts3auxCloseMethod(sqlite3_vtab_cursor * pCursor)235 static int fts3auxCloseMethod(sqlite3_vtab_cursor *pCursor){
236   Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
237   Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
238 
239   sqlite3Fts3SegmentsClose(pFts3);
240   sqlite3Fts3SegReaderFinish(&pCsr->csr);
241   sqlite3_free((void *)pCsr->filter.zTerm);
242   sqlite3_free(pCsr->zStop);
243   sqlite3_free(pCsr->aStat);
244   sqlite3_free(pCsr);
245   return SQLITE_OK;
246 }
247 
fts3auxGrowStatArray(Fts3auxCursor * pCsr,int nSize)248 static int fts3auxGrowStatArray(Fts3auxCursor *pCsr, int nSize){
249   if( nSize>pCsr->nStat ){
250     struct Fts3auxColstats *aNew;
251     aNew = (struct Fts3auxColstats *)sqlite3_realloc64(pCsr->aStat,
252         sizeof(struct Fts3auxColstats) * nSize
253     );
254     if( aNew==0 ) return SQLITE_NOMEM;
255     memset(&aNew[pCsr->nStat], 0,
256         sizeof(struct Fts3auxColstats) * (nSize - pCsr->nStat)
257     );
258     pCsr->aStat = aNew;
259     pCsr->nStat = nSize;
260   }
261   return SQLITE_OK;
262 }
263 
264 /*
265 ** xNext - Advance the cursor to the next row, if any.
266 */
fts3auxNextMethod(sqlite3_vtab_cursor * pCursor)267 static int fts3auxNextMethod(sqlite3_vtab_cursor *pCursor){
268   Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
269   Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
270   int rc;
271 
272   /* Increment our pretend rowid value. */
273   pCsr->iRowid++;
274 
275   for(pCsr->iCol++; pCsr->iCol<pCsr->nStat; pCsr->iCol++){
276     if( pCsr->aStat[pCsr->iCol].nDoc>0 ) return SQLITE_OK;
277   }
278 
279   rc = sqlite3Fts3SegReaderStep(pFts3, &pCsr->csr);
280   if( rc==SQLITE_ROW ){
281     int i = 0;
282     int nDoclist = pCsr->csr.nDoclist;
283     char *aDoclist = pCsr->csr.aDoclist;
284     int iCol;
285 
286     int eState = 0;
287 
288     if( pCsr->zStop ){
289       int n = (pCsr->nStop<pCsr->csr.nTerm) ? pCsr->nStop : pCsr->csr.nTerm;
290       int mc = memcmp(pCsr->zStop, pCsr->csr.zTerm, n);
291       if( mc<0 || (mc==0 && pCsr->csr.nTerm>pCsr->nStop) ){
292         pCsr->isEof = 1;
293         return SQLITE_OK;
294       }
295     }
296 
297     if( fts3auxGrowStatArray(pCsr, 2) ) return SQLITE_NOMEM;
298     memset(pCsr->aStat, 0, sizeof(struct Fts3auxColstats) * pCsr->nStat);
299     iCol = 0;
300     rc = SQLITE_OK;
301 
302     while( i<nDoclist ){
303       sqlite3_int64 v = 0;
304 
305       i += sqlite3Fts3GetVarint(&aDoclist[i], &v);
306       switch( eState ){
307         /* State 0. In this state the integer just read was a docid. */
308         case 0:
309           pCsr->aStat[0].nDoc++;
310           eState = 1;
311           iCol = 0;
312           break;
313 
314         /* State 1. In this state we are expecting either a 1, indicating
315         ** that the following integer will be a column number, or the
316         ** start of a position list for column 0.
317         **
318         ** The only difference between state 1 and state 2 is that if the
319         ** integer encountered in state 1 is not 0 or 1, then we need to
320         ** increment the column 0 "nDoc" count for this term.
321         */
322         case 1:
323           assert( iCol==0 );
324           if( v>1 ){
325             pCsr->aStat[1].nDoc++;
326           }
327           eState = 2;
328           /* fall through */
329 
330         case 2:
331           if( v==0 ){       /* 0x00. Next integer will be a docid. */
332             eState = 0;
333           }else if( v==1 ){ /* 0x01. Next integer will be a column number. */
334             eState = 3;
335           }else{            /* 2 or greater. A position. */
336             pCsr->aStat[iCol+1].nOcc++;
337             pCsr->aStat[0].nOcc++;
338           }
339           break;
340 
341         /* State 3. The integer just read is a column number. */
342         default: assert( eState==3 );
343           iCol = (int)v;
344           if( iCol<1 ){
345             rc = SQLITE_CORRUPT_VTAB;
346             break;
347           }
348           if( fts3auxGrowStatArray(pCsr, iCol+2) ) return SQLITE_NOMEM;
349           pCsr->aStat[iCol+1].nDoc++;
350           eState = 2;
351           break;
352       }
353     }
354 
355     pCsr->iCol = 0;
356   }else{
357     pCsr->isEof = 1;
358   }
359   return rc;
360 }
361 
362 /*
363 ** xFilter - Initialize a cursor to point at the start of its data.
364 */
fts3auxFilterMethod(sqlite3_vtab_cursor * pCursor,int idxNum,const char * idxStr,int nVal,sqlite3_value ** apVal)365 static int fts3auxFilterMethod(
366   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
367   int idxNum,                     /* Strategy index */
368   const char *idxStr,             /* Unused */
369   int nVal,                       /* Number of elements in apVal */
370   sqlite3_value **apVal           /* Arguments for the indexing scheme */
371 ){
372   Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
373   Fts3Table *pFts3 = ((Fts3auxTable *)pCursor->pVtab)->pFts3Tab;
374   int rc;
375   int isScan = 0;
376   int iLangVal = 0;               /* Language id to query */
377 
378   int iEq = -1;                   /* Index of term=? value in apVal */
379   int iGe = -1;                   /* Index of term>=? value in apVal */
380   int iLe = -1;                   /* Index of term<=? value in apVal */
381   int iLangid = -1;               /* Index of languageid=? value in apVal */
382   int iNext = 0;
383 
384   UNUSED_PARAMETER(nVal);
385   UNUSED_PARAMETER(idxStr);
386 
387   assert( idxStr==0 );
388   assert( idxNum==FTS4AUX_EQ_CONSTRAINT || idxNum==0
389        || idxNum==FTS4AUX_LE_CONSTRAINT || idxNum==FTS4AUX_GE_CONSTRAINT
390        || idxNum==(FTS4AUX_LE_CONSTRAINT|FTS4AUX_GE_CONSTRAINT)
391   );
392 
393   if( idxNum==FTS4AUX_EQ_CONSTRAINT ){
394     iEq = iNext++;
395   }else{
396     isScan = 1;
397     if( idxNum & FTS4AUX_GE_CONSTRAINT ){
398       iGe = iNext++;
399     }
400     if( idxNum & FTS4AUX_LE_CONSTRAINT ){
401       iLe = iNext++;
402     }
403   }
404   if( iNext<nVal ){
405     iLangid = iNext++;
406   }
407 
408   /* In case this cursor is being reused, close and zero it. */
409   testcase(pCsr->filter.zTerm);
410   sqlite3Fts3SegReaderFinish(&pCsr->csr);
411   sqlite3_free((void *)pCsr->filter.zTerm);
412   sqlite3_free(pCsr->aStat);
413   sqlite3_free(pCsr->zStop);
414   memset(&pCsr->csr, 0, ((u8*)&pCsr[1]) - (u8*)&pCsr->csr);
415 
416   pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
417   if( isScan ) pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
418 
419   if( iEq>=0 || iGe>=0 ){
420     const unsigned char *zStr = sqlite3_value_text(apVal[0]);
421     assert( (iEq==0 && iGe==-1) || (iEq==-1 && iGe==0) );
422     if( zStr ){
423       pCsr->filter.zTerm = sqlite3_mprintf("%s", zStr);
424       if( pCsr->filter.zTerm==0 ) return SQLITE_NOMEM;
425       pCsr->filter.nTerm = (int)strlen(pCsr->filter.zTerm);
426     }
427   }
428 
429   if( iLe>=0 ){
430     pCsr->zStop = sqlite3_mprintf("%s", sqlite3_value_text(apVal[iLe]));
431     if( pCsr->zStop==0 ) return SQLITE_NOMEM;
432     pCsr->nStop = (int)strlen(pCsr->zStop);
433   }
434 
435   if( iLangid>=0 ){
436     iLangVal = sqlite3_value_int(apVal[iLangid]);
437 
438     /* If the user specified a negative value for the languageid, use zero
439     ** instead. This works, as the "languageid=?" constraint will also
440     ** be tested by the VDBE layer. The test will always be false (since
441     ** this module will not return a row with a negative languageid), and
442     ** so the overall query will return zero rows.  */
443     if( iLangVal<0 ) iLangVal = 0;
444   }
445   pCsr->iLangid = iLangVal;
446 
447   rc = sqlite3Fts3SegReaderCursor(pFts3, iLangVal, 0, FTS3_SEGCURSOR_ALL,
448       pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
449   );
450   if( rc==SQLITE_OK ){
451     rc = sqlite3Fts3SegReaderStart(pFts3, &pCsr->csr, &pCsr->filter);
452   }
453 
454   if( rc==SQLITE_OK ) rc = fts3auxNextMethod(pCursor);
455   return rc;
456 }
457 
458 /*
459 ** xEof - Return true if the cursor is at EOF, or false otherwise.
460 */
fts3auxEofMethod(sqlite3_vtab_cursor * pCursor)461 static int fts3auxEofMethod(sqlite3_vtab_cursor *pCursor){
462   Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
463   return pCsr->isEof;
464 }
465 
466 /*
467 ** xColumn - Return a column value.
468 */
fts3auxColumnMethod(sqlite3_vtab_cursor * pCursor,sqlite3_context * pCtx,int iCol)469 static int fts3auxColumnMethod(
470   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
471   sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
472   int iCol                        /* Index of column to read value from */
473 ){
474   Fts3auxCursor *p = (Fts3auxCursor *)pCursor;
475 
476   assert( p->isEof==0 );
477   switch( iCol ){
478     case 0: /* term */
479       sqlite3_result_text(pCtx, p->csr.zTerm, p->csr.nTerm, SQLITE_TRANSIENT);
480       break;
481 
482     case 1: /* col */
483       if( p->iCol ){
484         sqlite3_result_int(pCtx, p->iCol-1);
485       }else{
486         sqlite3_result_text(pCtx, "*", -1, SQLITE_STATIC);
487       }
488       break;
489 
490     case 2: /* documents */
491       sqlite3_result_int64(pCtx, p->aStat[p->iCol].nDoc);
492       break;
493 
494     case 3: /* occurrences */
495       sqlite3_result_int64(pCtx, p->aStat[p->iCol].nOcc);
496       break;
497 
498     default: /* languageid */
499       assert( iCol==4 );
500       sqlite3_result_int(pCtx, p->iLangid);
501       break;
502   }
503 
504   return SQLITE_OK;
505 }
506 
507 /*
508 ** xRowid - Return the current rowid for the cursor.
509 */
fts3auxRowidMethod(sqlite3_vtab_cursor * pCursor,sqlite_int64 * pRowid)510 static int fts3auxRowidMethod(
511   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
512   sqlite_int64 *pRowid            /* OUT: Rowid value */
513 ){
514   Fts3auxCursor *pCsr = (Fts3auxCursor *)pCursor;
515   *pRowid = pCsr->iRowid;
516   return SQLITE_OK;
517 }
518 
519 /*
520 ** Register the fts3aux module with database connection db. Return SQLITE_OK
521 ** if successful or an error code if sqlite3_create_module() fails.
522 */
sqlite3Fts3InitAux(sqlite3 * db)523 int sqlite3Fts3InitAux(sqlite3 *db){
524   static const sqlite3_module fts3aux_module = {
525      0,                           /* iVersion      */
526      fts3auxConnectMethod,        /* xCreate       */
527      fts3auxConnectMethod,        /* xConnect      */
528      fts3auxBestIndexMethod,      /* xBestIndex    */
529      fts3auxDisconnectMethod,     /* xDisconnect   */
530      fts3auxDisconnectMethod,     /* xDestroy      */
531      fts3auxOpenMethod,           /* xOpen         */
532      fts3auxCloseMethod,          /* xClose        */
533      fts3auxFilterMethod,         /* xFilter       */
534      fts3auxNextMethod,           /* xNext         */
535      fts3auxEofMethod,            /* xEof          */
536      fts3auxColumnMethod,         /* xColumn       */
537      fts3auxRowidMethod,          /* xRowid        */
538      0,                           /* xUpdate       */
539      0,                           /* xBegin        */
540      0,                           /* xSync         */
541      0,                           /* xCommit       */
542      0,                           /* xRollback     */
543      0,                           /* xFindFunction */
544      0,                           /* xRename       */
545      0,                           /* xSavepoint    */
546      0,                           /* xRelease      */
547      0,                           /* xRollbackTo   */
548      0                            /* xShadowName   */
549   };
550   int rc;                         /* Return code */
551 
552   rc = sqlite3_create_module(db, "fts4aux", &fts3aux_module, 0);
553   return rc;
554 }
555 
556 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
557