xref: /sqlite-3.40.0/ext/fts5/fts5_expr.c (revision c381056e)
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 */
14 
15 
16 
17 #include "fts5Int.h"
18 #include "fts5parse.h"
19 
20 /*
21 ** All token types in the generated fts5parse.h file are greater than 0.
22 */
23 #define FTS5_EOF 0
24 
25 #define FTS5_LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
26 
27 typedef struct Fts5ExprTerm Fts5ExprTerm;
28 
29 /*
30 ** Functions generated by lemon from fts5parse.y.
31 */
32 void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
33 void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
34 void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
35 #ifndef NDEBUG
36 #include <stdio.h>
37 void sqlite3Fts5ParserTrace(FILE*, char*);
38 #endif
39 int sqlite3Fts5ParserFallback(int);
40 
41 
42 struct Fts5Expr {
43   Fts5Index *pIndex;
44   Fts5Config *pConfig;
45   Fts5ExprNode *pRoot;
46   int bDesc;                      /* Iterate in descending rowid order */
47   int nPhrase;                    /* Number of phrases in expression */
48   Fts5ExprPhrase **apExprPhrase;  /* Pointers to phrase objects */
49 };
50 
51 /*
52 ** eType:
53 **   Expression node type. Always one of:
54 **
55 **       FTS5_AND                 (nChild, apChild valid)
56 **       FTS5_OR                  (nChild, apChild valid)
57 **       FTS5_NOT                 (nChild, apChild valid)
58 **       FTS5_STRING              (pNear valid)
59 **       FTS5_TERM                (pNear valid)
60 */
61 struct Fts5ExprNode {
62   int eType;                      /* Node type */
63   int bEof;                       /* True at EOF */
64   int bNomatch;                   /* True if entry is not a match */
65 
66   /* Next method for this node. */
67   int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
68 
69   i64 iRowid;                     /* Current rowid */
70   Fts5ExprNearset *pNear;         /* For FTS5_STRING - cluster of phrases */
71 
72   /* Child nodes. For a NOT node, this array always contains 2 entries. For
73   ** AND or OR nodes, it contains 2 or more entries.  */
74   int nChild;                     /* Number of child nodes */
75   Fts5ExprNode *apChild[1];       /* Array of child nodes */
76 };
77 
78 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
79 
80 /*
81 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be
82 ** used as if it has the same signature as the xNext() methods themselves.
83 */
84 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
85 
86 /*
87 ** An instance of the following structure represents a single search term
88 ** or term prefix.
89 */
90 struct Fts5ExprTerm {
91   u8 bPrefix;                     /* True for a prefix term */
92   u8 bFirst;                      /* True if token must be first in column */
93   char *zTerm;                    /* nul-terminated term */
94   Fts5IndexIter *pIter;           /* Iterator for this term */
95   Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
96 };
97 
98 /*
99 ** A phrase. One or more terms that must appear in a contiguous sequence
100 ** within a document for it to match.
101 */
102 struct Fts5ExprPhrase {
103   Fts5ExprNode *pNode;            /* FTS5_STRING node this phrase is part of */
104   Fts5Buffer poslist;             /* Current position list */
105   int nTerm;                      /* Number of entries in aTerm[] */
106   Fts5ExprTerm aTerm[1];          /* Terms that make up this phrase */
107 };
108 
109 /*
110 ** One or more phrases that must appear within a certain token distance of
111 ** each other within each matching document.
112 */
113 struct Fts5ExprNearset {
114   int nNear;                      /* NEAR parameter */
115   Fts5Colset *pColset;            /* Columns to search (NULL -> all columns) */
116   int nPhrase;                    /* Number of entries in aPhrase[] array */
117   Fts5ExprPhrase *apPhrase[1];    /* Array of phrase pointers */
118 };
119 
120 
121 /*
122 ** Parse context.
123 */
124 struct Fts5Parse {
125   Fts5Config *pConfig;
126   char *zErr;
127   int rc;
128   int nPhrase;                    /* Size of apPhrase array */
129   Fts5ExprPhrase **apPhrase;      /* Array of all phrases */
130   Fts5ExprNode *pExpr;            /* Result of a successful parse */
131   int bPhraseToAnd;               /* Convert "a+b" to "a AND b" */
132 };
133 
sqlite3Fts5ParseError(Fts5Parse * pParse,const char * zFmt,...)134 void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
135   va_list ap;
136   va_start(ap, zFmt);
137   if( pParse->rc==SQLITE_OK ){
138     assert( pParse->zErr==0 );
139     pParse->zErr = sqlite3_vmprintf(zFmt, ap);
140     pParse->rc = SQLITE_ERROR;
141   }
142   va_end(ap);
143 }
144 
fts5ExprIsspace(char t)145 static int fts5ExprIsspace(char t){
146   return t==' ' || t=='\t' || t=='\n' || t=='\r';
147 }
148 
149 /*
150 ** Read the first token from the nul-terminated string at *pz.
151 */
fts5ExprGetToken(Fts5Parse * pParse,const char ** pz,Fts5Token * pToken)152 static int fts5ExprGetToken(
153   Fts5Parse *pParse,
154   const char **pz,                /* IN/OUT: Pointer into buffer */
155   Fts5Token *pToken
156 ){
157   const char *z = *pz;
158   int tok;
159 
160   /* Skip past any whitespace */
161   while( fts5ExprIsspace(*z) ) z++;
162 
163   pToken->p = z;
164   pToken->n = 1;
165   switch( *z ){
166     case '(':  tok = FTS5_LP;    break;
167     case ')':  tok = FTS5_RP;    break;
168     case '{':  tok = FTS5_LCP;   break;
169     case '}':  tok = FTS5_RCP;   break;
170     case ':':  tok = FTS5_COLON; break;
171     case ',':  tok = FTS5_COMMA; break;
172     case '+':  tok = FTS5_PLUS;  break;
173     case '*':  tok = FTS5_STAR;  break;
174     case '-':  tok = FTS5_MINUS; break;
175     case '^':  tok = FTS5_CARET; break;
176     case '\0': tok = FTS5_EOF;   break;
177 
178     case '"': {
179       const char *z2;
180       tok = FTS5_STRING;
181 
182       for(z2=&z[1]; 1; z2++){
183         if( z2[0]=='"' ){
184           z2++;
185           if( z2[0]!='"' ) break;
186         }
187         if( z2[0]=='\0' ){
188           sqlite3Fts5ParseError(pParse, "unterminated string");
189           return FTS5_EOF;
190         }
191       }
192       pToken->n = (z2 - z);
193       break;
194     }
195 
196     default: {
197       const char *z2;
198       if( sqlite3Fts5IsBareword(z[0])==0 ){
199         sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
200         return FTS5_EOF;
201       }
202       tok = FTS5_STRING;
203       for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
204       pToken->n = (z2 - z);
205       if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
206       if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
207       if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
208       break;
209     }
210   }
211 
212   *pz = &pToken->p[pToken->n];
213   return tok;
214 }
215 
fts5ParseAlloc(u64 t)216 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);}
fts5ParseFree(void * p)217 static void fts5ParseFree(void *p){ sqlite3_free(p); }
218 
sqlite3Fts5ExprNew(Fts5Config * pConfig,int bPhraseToAnd,int iCol,const char * zExpr,Fts5Expr ** ppNew,char ** pzErr)219 int sqlite3Fts5ExprNew(
220   Fts5Config *pConfig,            /* FTS5 Configuration */
221   int bPhraseToAnd,
222   int iCol,
223   const char *zExpr,              /* Expression text */
224   Fts5Expr **ppNew,
225   char **pzErr
226 ){
227   Fts5Parse sParse;
228   Fts5Token token;
229   const char *z = zExpr;
230   int t;                          /* Next token type */
231   void *pEngine;
232   Fts5Expr *pNew;
233 
234   *ppNew = 0;
235   *pzErr = 0;
236   memset(&sParse, 0, sizeof(sParse));
237   sParse.bPhraseToAnd = bPhraseToAnd;
238   pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
239   if( pEngine==0 ){ return SQLITE_NOMEM; }
240   sParse.pConfig = pConfig;
241 
242   do {
243     t = fts5ExprGetToken(&sParse, &z, &token);
244     sqlite3Fts5Parser(pEngine, t, token, &sParse);
245   }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
246   sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
247 
248   /* If the LHS of the MATCH expression was a user column, apply the
249   ** implicit column-filter.  */
250   if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
251     int n = sizeof(Fts5Colset);
252     Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
253     if( pColset ){
254       pColset->nCol = 1;
255       pColset->aiCol[0] = iCol;
256       sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
257     }
258   }
259 
260   assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
261   if( sParse.rc==SQLITE_OK ){
262     *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
263     if( pNew==0 ){
264       sParse.rc = SQLITE_NOMEM;
265       sqlite3Fts5ParseNodeFree(sParse.pExpr);
266     }else{
267       if( !sParse.pExpr ){
268         const int nByte = sizeof(Fts5ExprNode);
269         pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
270         if( pNew->pRoot ){
271           pNew->pRoot->bEof = 1;
272         }
273       }else{
274         pNew->pRoot = sParse.pExpr;
275       }
276       pNew->pIndex = 0;
277       pNew->pConfig = pConfig;
278       pNew->apExprPhrase = sParse.apPhrase;
279       pNew->nPhrase = sParse.nPhrase;
280       pNew->bDesc = 0;
281       sParse.apPhrase = 0;
282     }
283   }else{
284     sqlite3Fts5ParseNodeFree(sParse.pExpr);
285   }
286 
287   sqlite3_free(sParse.apPhrase);
288   *pzErr = sParse.zErr;
289   return sParse.rc;
290 }
291 
292 /*
293 ** This function is only called when using the special 'trigram' tokenizer.
294 ** Argument zText contains the text of a LIKE or GLOB pattern matched
295 ** against column iCol. This function creates and compiles an FTS5 MATCH
296 ** expression that will match a superset of the rows matched by the LIKE or
297 ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
298 ** code.
299 */
sqlite3Fts5ExprPattern(Fts5Config * pConfig,int bGlob,int iCol,const char * zText,Fts5Expr ** pp)300 int sqlite3Fts5ExprPattern(
301   Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
302 ){
303   i64 nText = strlen(zText);
304   char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1);
305   int rc = SQLITE_OK;
306 
307   if( zExpr==0 ){
308     rc = SQLITE_NOMEM;
309   }else{
310     char aSpec[3];
311     int iOut = 0;
312     int i = 0;
313     int iFirst = 0;
314 
315     if( bGlob==0 ){
316       aSpec[0] = '_';
317       aSpec[1] = '%';
318       aSpec[2] = 0;
319     }else{
320       aSpec[0] = '*';
321       aSpec[1] = '?';
322       aSpec[2] = '[';
323     }
324 
325     while( i<=nText ){
326       if( i==nText
327        || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
328       ){
329         if( i-iFirst>=3 ){
330           int jj;
331           zExpr[iOut++] = '"';
332           for(jj=iFirst; jj<i; jj++){
333             zExpr[iOut++] = zText[jj];
334             if( zText[jj]=='"' ) zExpr[iOut++] = '"';
335           }
336           zExpr[iOut++] = '"';
337           zExpr[iOut++] = ' ';
338         }
339         if( zText[i]==aSpec[2] ){
340           i += 2;
341           if( zText[i-1]=='^' ) i++;
342           while( i<nText && zText[i]!=']' ) i++;
343         }
344         iFirst = i+1;
345       }
346       i++;
347     }
348     if( iOut>0 ){
349       int bAnd = 0;
350       if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
351         bAnd = 1;
352         if( pConfig->eDetail==FTS5_DETAIL_NONE ){
353           iCol = pConfig->nCol;
354         }
355       }
356       zExpr[iOut] = '\0';
357       rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
358     }else{
359       *pp = 0;
360     }
361     sqlite3_free(zExpr);
362   }
363 
364   return rc;
365 }
366 
367 /*
368 ** Free the expression node object passed as the only argument.
369 */
sqlite3Fts5ParseNodeFree(Fts5ExprNode * p)370 void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
371   if( p ){
372     int i;
373     for(i=0; i<p->nChild; i++){
374       sqlite3Fts5ParseNodeFree(p->apChild[i]);
375     }
376     sqlite3Fts5ParseNearsetFree(p->pNear);
377     sqlite3_free(p);
378   }
379 }
380 
381 /*
382 ** Free the expression object passed as the only argument.
383 */
sqlite3Fts5ExprFree(Fts5Expr * p)384 void sqlite3Fts5ExprFree(Fts5Expr *p){
385   if( p ){
386     sqlite3Fts5ParseNodeFree(p->pRoot);
387     sqlite3_free(p->apExprPhrase);
388     sqlite3_free(p);
389   }
390 }
391 
sqlite3Fts5ExprAnd(Fts5Expr ** pp1,Fts5Expr * p2)392 int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
393   Fts5Parse sParse;
394   memset(&sParse, 0, sizeof(sParse));
395 
396   if( *pp1 ){
397     Fts5Expr *p1 = *pp1;
398     int nPhrase = p1->nPhrase + p2->nPhrase;
399 
400     p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0);
401     p2->pRoot = 0;
402 
403     if( sParse.rc==SQLITE_OK ){
404       Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc(
405           p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*)
406       );
407       if( ap==0 ){
408         sParse.rc = SQLITE_NOMEM;
409       }else{
410         int i;
411         memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*));
412         for(i=0; i<p2->nPhrase; i++){
413           ap[i] = p2->apExprPhrase[i];
414         }
415         p1->nPhrase = nPhrase;
416         p1->apExprPhrase = ap;
417       }
418     }
419     sqlite3_free(p2->apExprPhrase);
420     sqlite3_free(p2);
421   }else{
422     *pp1 = p2;
423   }
424 
425   return sParse.rc;
426 }
427 
428 /*
429 ** Argument pTerm must be a synonym iterator. Return the current rowid
430 ** that it points to.
431 */
fts5ExprSynonymRowid(Fts5ExprTerm * pTerm,int bDesc,int * pbEof)432 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
433   i64 iRet = 0;
434   int bRetValid = 0;
435   Fts5ExprTerm *p;
436 
437   assert( pTerm );
438   assert( pTerm->pSynonym );
439   assert( bDesc==0 || bDesc==1 );
440   for(p=pTerm; p; p=p->pSynonym){
441     if( 0==sqlite3Fts5IterEof(p->pIter) ){
442       i64 iRowid = p->pIter->iRowid;
443       if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
444         iRet = iRowid;
445         bRetValid = 1;
446       }
447     }
448   }
449 
450   if( pbEof && bRetValid==0 ) *pbEof = 1;
451   return iRet;
452 }
453 
454 /*
455 ** Argument pTerm must be a synonym iterator.
456 */
fts5ExprSynonymList(Fts5ExprTerm * pTerm,i64 iRowid,Fts5Buffer * pBuf,u8 ** pa,int * pn)457 static int fts5ExprSynonymList(
458   Fts5ExprTerm *pTerm,
459   i64 iRowid,
460   Fts5Buffer *pBuf,               /* Use this buffer for space if required */
461   u8 **pa, int *pn
462 ){
463   Fts5PoslistReader aStatic[4];
464   Fts5PoslistReader *aIter = aStatic;
465   int nIter = 0;
466   int nAlloc = 4;
467   int rc = SQLITE_OK;
468   Fts5ExprTerm *p;
469 
470   assert( pTerm->pSynonym );
471   for(p=pTerm; p; p=p->pSynonym){
472     Fts5IndexIter *pIter = p->pIter;
473     if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
474       if( pIter->nData==0 ) continue;
475       if( nIter==nAlloc ){
476         sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
477         Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
478         if( aNew==0 ){
479           rc = SQLITE_NOMEM;
480           goto synonym_poslist_out;
481         }
482         memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
483         nAlloc = nAlloc*2;
484         if( aIter!=aStatic ) sqlite3_free(aIter);
485         aIter = aNew;
486       }
487       sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
488       assert( aIter[nIter].bEof==0 );
489       nIter++;
490     }
491   }
492 
493   if( nIter==1 ){
494     *pa = (u8*)aIter[0].a;
495     *pn = aIter[0].n;
496   }else{
497     Fts5PoslistWriter writer = {0};
498     i64 iPrev = -1;
499     fts5BufferZero(pBuf);
500     while( 1 ){
501       int i;
502       i64 iMin = FTS5_LARGEST_INT64;
503       for(i=0; i<nIter; i++){
504         if( aIter[i].bEof==0 ){
505           if( aIter[i].iPos==iPrev ){
506             if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
507           }
508           if( aIter[i].iPos<iMin ){
509             iMin = aIter[i].iPos;
510           }
511         }
512       }
513       if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
514       rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
515       iPrev = iMin;
516     }
517     if( rc==SQLITE_OK ){
518       *pa = pBuf->p;
519       *pn = pBuf->n;
520     }
521   }
522 
523  synonym_poslist_out:
524   if( aIter!=aStatic ) sqlite3_free(aIter);
525   return rc;
526 }
527 
528 
529 /*
530 ** All individual term iterators in pPhrase are guaranteed to be valid and
531 ** pointing to the same rowid when this function is called. This function
532 ** checks if the current rowid really is a match, and if so populates
533 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
534 ** is set to true if this is really a match, or false otherwise.
535 **
536 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
537 ** otherwise. It is not considered an error code if the current rowid is
538 ** not a match.
539 */
fts5ExprPhraseIsMatch(Fts5ExprNode * pNode,Fts5ExprPhrase * pPhrase,int * pbMatch)540 static int fts5ExprPhraseIsMatch(
541   Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
542   Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
543   int *pbMatch                    /* OUT: Set to true if really a match */
544 ){
545   Fts5PoslistWriter writer = {0};
546   Fts5PoslistReader aStatic[4];
547   Fts5PoslistReader *aIter = aStatic;
548   int i;
549   int rc = SQLITE_OK;
550   int bFirst = pPhrase->aTerm[0].bFirst;
551 
552   fts5BufferZero(&pPhrase->poslist);
553 
554   /* If the aStatic[] array is not large enough, allocate a large array
555   ** using sqlite3_malloc(). This approach could be improved upon. */
556   if( pPhrase->nTerm>ArraySize(aStatic) ){
557     sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
558     aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
559     if( !aIter ) return SQLITE_NOMEM;
560   }
561   memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
562 
563   /* Initialize a term iterator for each term in the phrase */
564   for(i=0; i<pPhrase->nTerm; i++){
565     Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
566     int n = 0;
567     int bFlag = 0;
568     u8 *a = 0;
569     if( pTerm->pSynonym ){
570       Fts5Buffer buf = {0, 0, 0};
571       rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
572       if( rc ){
573         sqlite3_free(a);
574         goto ismatch_out;
575       }
576       if( a==buf.p ) bFlag = 1;
577     }else{
578       a = (u8*)pTerm->pIter->pData;
579       n = pTerm->pIter->nData;
580     }
581     sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
582     aIter[i].bFlag = (u8)bFlag;
583     if( aIter[i].bEof ) goto ismatch_out;
584   }
585 
586   while( 1 ){
587     int bMatch;
588     i64 iPos = aIter[0].iPos;
589     do {
590       bMatch = 1;
591       for(i=0; i<pPhrase->nTerm; i++){
592         Fts5PoslistReader *pPos = &aIter[i];
593         i64 iAdj = iPos + i;
594         if( pPos->iPos!=iAdj ){
595           bMatch = 0;
596           while( pPos->iPos<iAdj ){
597             if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
598           }
599           if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
600         }
601       }
602     }while( bMatch==0 );
603 
604     /* Append position iPos to the output */
605     if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){
606       rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
607       if( rc!=SQLITE_OK ) goto ismatch_out;
608     }
609 
610     for(i=0; i<pPhrase->nTerm; i++){
611       if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
612     }
613   }
614 
615  ismatch_out:
616   *pbMatch = (pPhrase->poslist.n>0);
617   for(i=0; i<pPhrase->nTerm; i++){
618     if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
619   }
620   if( aIter!=aStatic ) sqlite3_free(aIter);
621   return rc;
622 }
623 
624 typedef struct Fts5LookaheadReader Fts5LookaheadReader;
625 struct Fts5LookaheadReader {
626   const u8 *a;                    /* Buffer containing position list */
627   int n;                          /* Size of buffer a[] in bytes */
628   int i;                          /* Current offset in position list */
629   i64 iPos;                       /* Current position */
630   i64 iLookahead;                 /* Next position */
631 };
632 
633 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
634 
fts5LookaheadReaderNext(Fts5LookaheadReader * p)635 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
636   p->iPos = p->iLookahead;
637   if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
638     p->iLookahead = FTS5_LOOKAHEAD_EOF;
639   }
640   return (p->iPos==FTS5_LOOKAHEAD_EOF);
641 }
642 
fts5LookaheadReaderInit(const u8 * a,int n,Fts5LookaheadReader * p)643 static int fts5LookaheadReaderInit(
644   const u8 *a, int n,             /* Buffer to read position list from */
645   Fts5LookaheadReader *p          /* Iterator object to initialize */
646 ){
647   memset(p, 0, sizeof(Fts5LookaheadReader));
648   p->a = a;
649   p->n = n;
650   fts5LookaheadReaderNext(p);
651   return fts5LookaheadReaderNext(p);
652 }
653 
654 typedef struct Fts5NearTrimmer Fts5NearTrimmer;
655 struct Fts5NearTrimmer {
656   Fts5LookaheadReader reader;     /* Input iterator */
657   Fts5PoslistWriter writer;       /* Writer context */
658   Fts5Buffer *pOut;               /* Output poslist */
659 };
660 
661 /*
662 ** The near-set object passed as the first argument contains more than
663 ** one phrase. All phrases currently point to the same row. The
664 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
665 ** tests if the current row contains instances of each phrase sufficiently
666 ** close together to meet the NEAR constraint. Non-zero is returned if it
667 ** does, or zero otherwise.
668 **
669 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
670 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
671 ** occurs within this function (*pRc) is set accordingly before returning.
672 ** The return value is undefined in both these cases.
673 **
674 ** If no error occurs and non-zero (a match) is returned, the position-list
675 ** of each phrase object is edited to contain only those entries that
676 ** meet the constraint before returning.
677 */
fts5ExprNearIsMatch(int * pRc,Fts5ExprNearset * pNear)678 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
679   Fts5NearTrimmer aStatic[4];
680   Fts5NearTrimmer *a = aStatic;
681   Fts5ExprPhrase **apPhrase = pNear->apPhrase;
682 
683   int i;
684   int rc = *pRc;
685   int bMatch;
686 
687   assert( pNear->nPhrase>1 );
688 
689   /* If the aStatic[] array is not large enough, allocate a large array
690   ** using sqlite3_malloc(). This approach could be improved upon. */
691   if( pNear->nPhrase>ArraySize(aStatic) ){
692     sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
693     a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
694   }else{
695     memset(aStatic, 0, sizeof(aStatic));
696   }
697   if( rc!=SQLITE_OK ){
698     *pRc = rc;
699     return 0;
700   }
701 
702   /* Initialize a lookahead iterator for each phrase. After passing the
703   ** buffer and buffer size to the lookaside-reader init function, zero
704   ** the phrase poslist buffer. The new poslist for the phrase (containing
705   ** the same entries as the original with some entries removed on account
706   ** of the NEAR constraint) is written over the original even as it is
707   ** being read. This is safe as the entries for the new poslist are a
708   ** subset of the old, so it is not possible for data yet to be read to
709   ** be overwritten.  */
710   for(i=0; i<pNear->nPhrase; i++){
711     Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
712     fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
713     pPoslist->n = 0;
714     a[i].pOut = pPoslist;
715   }
716 
717   while( 1 ){
718     int iAdv;
719     i64 iMin;
720     i64 iMax;
721 
722     /* This block advances the phrase iterators until they point to a set of
723     ** entries that together comprise a match.  */
724     iMax = a[0].reader.iPos;
725     do {
726       bMatch = 1;
727       for(i=0; i<pNear->nPhrase; i++){
728         Fts5LookaheadReader *pPos = &a[i].reader;
729         iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
730         if( pPos->iPos<iMin || pPos->iPos>iMax ){
731           bMatch = 0;
732           while( pPos->iPos<iMin ){
733             if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
734           }
735           if( pPos->iPos>iMax ) iMax = pPos->iPos;
736         }
737       }
738     }while( bMatch==0 );
739 
740     /* Add an entry to each output position list */
741     for(i=0; i<pNear->nPhrase; i++){
742       i64 iPos = a[i].reader.iPos;
743       Fts5PoslistWriter *pWriter = &a[i].writer;
744       if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
745         sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
746       }
747     }
748 
749     iAdv = 0;
750     iMin = a[0].reader.iLookahead;
751     for(i=0; i<pNear->nPhrase; i++){
752       if( a[i].reader.iLookahead < iMin ){
753         iMin = a[i].reader.iLookahead;
754         iAdv = i;
755       }
756     }
757     if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
758   }
759 
760   ismatch_out: {
761     int bRet = a[0].pOut->n>0;
762     *pRc = rc;
763     if( a!=aStatic ) sqlite3_free(a);
764     return bRet;
765   }
766 }
767 
768 /*
769 ** Advance iterator pIter until it points to a value equal to or laster
770 ** than the initial value of *piLast. If this means the iterator points
771 ** to a value laster than *piLast, update *piLast to the new lastest value.
772 **
773 ** If the iterator reaches EOF, set *pbEof to true before returning. If
774 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
775 ** are set, return a non-zero value. Otherwise, return zero.
776 */
fts5ExprAdvanceto(Fts5IndexIter * pIter,int bDesc,i64 * piLast,int * pRc,int * pbEof)777 static int fts5ExprAdvanceto(
778   Fts5IndexIter *pIter,           /* Iterator to advance */
779   int bDesc,                      /* True if iterator is "rowid DESC" */
780   i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
781   int *pRc,                       /* OUT: Error code */
782   int *pbEof                      /* OUT: Set to true if EOF */
783 ){
784   i64 iLast = *piLast;
785   i64 iRowid;
786 
787   iRowid = pIter->iRowid;
788   if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
789     int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
790     if( rc || sqlite3Fts5IterEof(pIter) ){
791       *pRc = rc;
792       *pbEof = 1;
793       return 1;
794     }
795     iRowid = pIter->iRowid;
796     assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
797   }
798   *piLast = iRowid;
799 
800   return 0;
801 }
802 
fts5ExprSynonymAdvanceto(Fts5ExprTerm * pTerm,int bDesc,i64 * piLast,int * pRc)803 static int fts5ExprSynonymAdvanceto(
804   Fts5ExprTerm *pTerm,            /* Term iterator to advance */
805   int bDesc,                      /* True if iterator is "rowid DESC" */
806   i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
807   int *pRc                        /* OUT: Error code */
808 ){
809   int rc = SQLITE_OK;
810   i64 iLast = *piLast;
811   Fts5ExprTerm *p;
812   int bEof = 0;
813 
814   for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
815     if( sqlite3Fts5IterEof(p->pIter)==0 ){
816       i64 iRowid = p->pIter->iRowid;
817       if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
818         rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
819       }
820     }
821   }
822 
823   if( rc!=SQLITE_OK ){
824     *pRc = rc;
825     bEof = 1;
826   }else{
827     *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
828   }
829   return bEof;
830 }
831 
832 
fts5ExprNearTest(int * pRc,Fts5Expr * pExpr,Fts5ExprNode * pNode)833 static int fts5ExprNearTest(
834   int *pRc,
835   Fts5Expr *pExpr,                /* Expression that pNear is a part of */
836   Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
837 ){
838   Fts5ExprNearset *pNear = pNode->pNear;
839   int rc = *pRc;
840 
841   if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
842     Fts5ExprTerm *pTerm;
843     Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
844     pPhrase->poslist.n = 0;
845     for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
846       Fts5IndexIter *pIter = pTerm->pIter;
847       if( sqlite3Fts5IterEof(pIter)==0 ){
848         if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
849           pPhrase->poslist.n = 1;
850         }
851       }
852     }
853     return pPhrase->poslist.n;
854   }else{
855     int i;
856 
857     /* Check that each phrase in the nearset matches the current row.
858     ** Populate the pPhrase->poslist buffers at the same time. If any
859     ** phrase is not a match, break out of the loop early.  */
860     for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
861       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
862       if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
863        || pNear->pColset || pPhrase->aTerm[0].bFirst
864       ){
865         int bMatch = 0;
866         rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
867         if( bMatch==0 ) break;
868       }else{
869         Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
870         fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
871       }
872     }
873 
874     *pRc = rc;
875     if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
876       return 1;
877     }
878     return 0;
879   }
880 }
881 
882 
883 /*
884 ** Initialize all term iterators in the pNear object. If any term is found
885 ** to match no documents at all, return immediately without initializing any
886 ** further iterators.
887 **
888 ** If an error occurs, return an SQLite error code. Otherwise, return
889 ** SQLITE_OK. It is not considered an error if some term matches zero
890 ** documents.
891 */
fts5ExprNearInitAll(Fts5Expr * pExpr,Fts5ExprNode * pNode)892 static int fts5ExprNearInitAll(
893   Fts5Expr *pExpr,
894   Fts5ExprNode *pNode
895 ){
896   Fts5ExprNearset *pNear = pNode->pNear;
897   int i;
898 
899   assert( pNode->bNomatch==0 );
900   for(i=0; i<pNear->nPhrase; i++){
901     Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
902     if( pPhrase->nTerm==0 ){
903       pNode->bEof = 1;
904       return SQLITE_OK;
905     }else{
906       int j;
907       for(j=0; j<pPhrase->nTerm; j++){
908         Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
909         Fts5ExprTerm *p;
910         int bHit = 0;
911 
912         for(p=pTerm; p; p=p->pSynonym){
913           int rc;
914           if( p->pIter ){
915             sqlite3Fts5IterClose(p->pIter);
916             p->pIter = 0;
917           }
918           rc = sqlite3Fts5IndexQuery(
919               pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
920               (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
921               (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
922               pNear->pColset,
923               &p->pIter
924           );
925           assert( (rc==SQLITE_OK)==(p->pIter!=0) );
926           if( rc!=SQLITE_OK ) return rc;
927           if( 0==sqlite3Fts5IterEof(p->pIter) ){
928             bHit = 1;
929           }
930         }
931 
932         if( bHit==0 ){
933           pNode->bEof = 1;
934           return SQLITE_OK;
935         }
936       }
937     }
938   }
939 
940   pNode->bEof = 0;
941   return SQLITE_OK;
942 }
943 
944 /*
945 ** If pExpr is an ASC iterator, this function returns a value with the
946 ** same sign as:
947 **
948 **   (iLhs - iRhs)
949 **
950 ** Otherwise, if this is a DESC iterator, the opposite is returned:
951 **
952 **   (iRhs - iLhs)
953 */
fts5RowidCmp(Fts5Expr * pExpr,i64 iLhs,i64 iRhs)954 static int fts5RowidCmp(
955   Fts5Expr *pExpr,
956   i64 iLhs,
957   i64 iRhs
958 ){
959   assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
960   if( pExpr->bDesc==0 ){
961     if( iLhs<iRhs ) return -1;
962     return (iLhs > iRhs);
963   }else{
964     if( iLhs>iRhs ) return -1;
965     return (iLhs < iRhs);
966   }
967 }
968 
fts5ExprSetEof(Fts5ExprNode * pNode)969 static void fts5ExprSetEof(Fts5ExprNode *pNode){
970   int i;
971   pNode->bEof = 1;
972   pNode->bNomatch = 0;
973   for(i=0; i<pNode->nChild; i++){
974     fts5ExprSetEof(pNode->apChild[i]);
975   }
976 }
977 
fts5ExprNodeZeroPoslist(Fts5ExprNode * pNode)978 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
979   if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
980     Fts5ExprNearset *pNear = pNode->pNear;
981     int i;
982     for(i=0; i<pNear->nPhrase; i++){
983       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
984       pPhrase->poslist.n = 0;
985     }
986   }else{
987     int i;
988     for(i=0; i<pNode->nChild; i++){
989       fts5ExprNodeZeroPoslist(pNode->apChild[i]);
990     }
991   }
992 }
993 
994 
995 
996 /*
997 ** Compare the values currently indicated by the two nodes as follows:
998 **
999 **    res = (*p1) - (*p2)
1000 **
1001 ** Nodes that point to values that come later in the iteration order are
1002 ** considered to be larger. Nodes at EOF are the largest of all.
1003 **
1004 ** This means that if the iteration order is ASC, then numerically larger
1005 ** rowids are considered larger. Or if it is the default DESC, numerically
1006 ** smaller rowids are larger.
1007 */
fts5NodeCompare(Fts5Expr * pExpr,Fts5ExprNode * p1,Fts5ExprNode * p2)1008 static int fts5NodeCompare(
1009   Fts5Expr *pExpr,
1010   Fts5ExprNode *p1,
1011   Fts5ExprNode *p2
1012 ){
1013   if( p2->bEof ) return -1;
1014   if( p1->bEof ) return +1;
1015   return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
1016 }
1017 
1018 /*
1019 ** All individual term iterators in pNear are guaranteed to be valid when
1020 ** this function is called. This function checks if all term iterators
1021 ** point to the same rowid, and if not, advances them until they do.
1022 ** If an EOF is reached before this happens, *pbEof is set to true before
1023 ** returning.
1024 **
1025 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
1026 ** otherwise. It is not considered an error code if an iterator reaches
1027 ** EOF.
1028 */
fts5ExprNodeTest_STRING(Fts5Expr * pExpr,Fts5ExprNode * pNode)1029 static int fts5ExprNodeTest_STRING(
1030   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
1031   Fts5ExprNode *pNode
1032 ){
1033   Fts5ExprNearset *pNear = pNode->pNear;
1034   Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
1035   int rc = SQLITE_OK;
1036   i64 iLast;                      /* Lastest rowid any iterator points to */
1037   int i, j;                       /* Phrase and token index, respectively */
1038   int bMatch;                     /* True if all terms are at the same rowid */
1039   const int bDesc = pExpr->bDesc;
1040 
1041   /* Check that this node should not be FTS5_TERM */
1042   assert( pNear->nPhrase>1
1043        || pNear->apPhrase[0]->nTerm>1
1044        || pNear->apPhrase[0]->aTerm[0].pSynonym
1045        || pNear->apPhrase[0]->aTerm[0].bFirst
1046   );
1047 
1048   /* Initialize iLast, the "lastest" rowid any iterator points to. If the
1049   ** iterator skips through rowids in the default ascending order, this means
1050   ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
1051   ** means the minimum rowid.  */
1052   if( pLeft->aTerm[0].pSynonym ){
1053     iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
1054   }else{
1055     iLast = pLeft->aTerm[0].pIter->iRowid;
1056   }
1057 
1058   do {
1059     bMatch = 1;
1060     for(i=0; i<pNear->nPhrase; i++){
1061       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
1062       for(j=0; j<pPhrase->nTerm; j++){
1063         Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
1064         if( pTerm->pSynonym ){
1065           i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
1066           if( iRowid==iLast ) continue;
1067           bMatch = 0;
1068           if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
1069             pNode->bNomatch = 0;
1070             pNode->bEof = 1;
1071             return rc;
1072           }
1073         }else{
1074           Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
1075           if( pIter->iRowid==iLast || pIter->bEof ) continue;
1076           bMatch = 0;
1077           if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
1078             return rc;
1079           }
1080         }
1081       }
1082     }
1083   }while( bMatch==0 );
1084 
1085   pNode->iRowid = iLast;
1086   pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
1087   assert( pNode->bEof==0 || pNode->bNomatch==0 );
1088 
1089   return rc;
1090 }
1091 
1092 /*
1093 ** Advance the first term iterator in the first phrase of pNear. Set output
1094 ** variable *pbEof to true if it reaches EOF or if an error occurs.
1095 **
1096 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1097 ** occurs.
1098 */
fts5ExprNodeNext_STRING(Fts5Expr * pExpr,Fts5ExprNode * pNode,int bFromValid,i64 iFrom)1099 static int fts5ExprNodeNext_STRING(
1100   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
1101   Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
1102   int bFromValid,
1103   i64 iFrom
1104 ){
1105   Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
1106   int rc = SQLITE_OK;
1107 
1108   pNode->bNomatch = 0;
1109   if( pTerm->pSynonym ){
1110     int bEof = 1;
1111     Fts5ExprTerm *p;
1112 
1113     /* Find the firstest rowid any synonym points to. */
1114     i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
1115 
1116     /* Advance each iterator that currently points to iRowid. Or, if iFrom
1117     ** is valid - each iterator that points to a rowid before iFrom.  */
1118     for(p=pTerm; p; p=p->pSynonym){
1119       if( sqlite3Fts5IterEof(p->pIter)==0 ){
1120         i64 ii = p->pIter->iRowid;
1121         if( ii==iRowid
1122          || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
1123         ){
1124           if( bFromValid ){
1125             rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
1126           }else{
1127             rc = sqlite3Fts5IterNext(p->pIter);
1128           }
1129           if( rc!=SQLITE_OK ) break;
1130           if( sqlite3Fts5IterEof(p->pIter)==0 ){
1131             bEof = 0;
1132           }
1133         }else{
1134           bEof = 0;
1135         }
1136       }
1137     }
1138 
1139     /* Set the EOF flag if either all synonym iterators are at EOF or an
1140     ** error has occurred.  */
1141     pNode->bEof = (rc || bEof);
1142   }else{
1143     Fts5IndexIter *pIter = pTerm->pIter;
1144 
1145     assert( Fts5NodeIsString(pNode) );
1146     if( bFromValid ){
1147       rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
1148     }else{
1149       rc = sqlite3Fts5IterNext(pIter);
1150     }
1151 
1152     pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
1153   }
1154 
1155   if( pNode->bEof==0 ){
1156     assert( rc==SQLITE_OK );
1157     rc = fts5ExprNodeTest_STRING(pExpr, pNode);
1158   }
1159 
1160   return rc;
1161 }
1162 
1163 
fts5ExprNodeTest_TERM(Fts5Expr * pExpr,Fts5ExprNode * pNode)1164 static int fts5ExprNodeTest_TERM(
1165   Fts5Expr *pExpr,                /* Expression that pNear is a part of */
1166   Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
1167 ){
1168   /* As this "NEAR" object is actually a single phrase that consists
1169   ** of a single term only, grab pointers into the poslist managed by the
1170   ** fts5_index.c iterator object. This is much faster than synthesizing
1171   ** a new poslist the way we have to for more complicated phrase or NEAR
1172   ** expressions.  */
1173   Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
1174   Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
1175 
1176   assert( pNode->eType==FTS5_TERM );
1177   assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
1178   assert( pPhrase->aTerm[0].pSynonym==0 );
1179 
1180   pPhrase->poslist.n = pIter->nData;
1181   if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
1182     pPhrase->poslist.p = (u8*)pIter->pData;
1183   }
1184   pNode->iRowid = pIter->iRowid;
1185   pNode->bNomatch = (pPhrase->poslist.n==0);
1186   return SQLITE_OK;
1187 }
1188 
1189 /*
1190 ** xNext() method for a node of type FTS5_TERM.
1191 */
fts5ExprNodeNext_TERM(Fts5Expr * pExpr,Fts5ExprNode * pNode,int bFromValid,i64 iFrom)1192 static int fts5ExprNodeNext_TERM(
1193   Fts5Expr *pExpr,
1194   Fts5ExprNode *pNode,
1195   int bFromValid,
1196   i64 iFrom
1197 ){
1198   int rc;
1199   Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
1200 
1201   assert( pNode->bEof==0 );
1202   if( bFromValid ){
1203     rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
1204   }else{
1205     rc = sqlite3Fts5IterNext(pIter);
1206   }
1207   if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
1208     rc = fts5ExprNodeTest_TERM(pExpr, pNode);
1209   }else{
1210     pNode->bEof = 1;
1211     pNode->bNomatch = 0;
1212   }
1213   return rc;
1214 }
1215 
fts5ExprNodeTest_OR(Fts5Expr * pExpr,Fts5ExprNode * pNode)1216 static void fts5ExprNodeTest_OR(
1217   Fts5Expr *pExpr,                /* Expression of which pNode is a part */
1218   Fts5ExprNode *pNode             /* Expression node to test */
1219 ){
1220   Fts5ExprNode *pNext = pNode->apChild[0];
1221   int i;
1222 
1223   for(i=1; i<pNode->nChild; i++){
1224     Fts5ExprNode *pChild = pNode->apChild[i];
1225     int cmp = fts5NodeCompare(pExpr, pNext, pChild);
1226     if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
1227       pNext = pChild;
1228     }
1229   }
1230   pNode->iRowid = pNext->iRowid;
1231   pNode->bEof = pNext->bEof;
1232   pNode->bNomatch = pNext->bNomatch;
1233 }
1234 
fts5ExprNodeNext_OR(Fts5Expr * pExpr,Fts5ExprNode * pNode,int bFromValid,i64 iFrom)1235 static int fts5ExprNodeNext_OR(
1236   Fts5Expr *pExpr,
1237   Fts5ExprNode *pNode,
1238   int bFromValid,
1239   i64 iFrom
1240 ){
1241   int i;
1242   i64 iLast = pNode->iRowid;
1243 
1244   for(i=0; i<pNode->nChild; i++){
1245     Fts5ExprNode *p1 = pNode->apChild[i];
1246     assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
1247     if( p1->bEof==0 ){
1248       if( (p1->iRowid==iLast)
1249        || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
1250       ){
1251         int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
1252         if( rc!=SQLITE_OK ){
1253           pNode->bNomatch = 0;
1254           return rc;
1255         }
1256       }
1257     }
1258   }
1259 
1260   fts5ExprNodeTest_OR(pExpr, pNode);
1261   return SQLITE_OK;
1262 }
1263 
1264 /*
1265 ** Argument pNode is an FTS5_AND node.
1266 */
fts5ExprNodeTest_AND(Fts5Expr * pExpr,Fts5ExprNode * pAnd)1267 static int fts5ExprNodeTest_AND(
1268   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
1269   Fts5ExprNode *pAnd              /* FTS5_AND node to advance */
1270 ){
1271   int iChild;
1272   i64 iLast = pAnd->iRowid;
1273   int rc = SQLITE_OK;
1274   int bMatch;
1275 
1276   assert( pAnd->bEof==0 );
1277   do {
1278     pAnd->bNomatch = 0;
1279     bMatch = 1;
1280     for(iChild=0; iChild<pAnd->nChild; iChild++){
1281       Fts5ExprNode *pChild = pAnd->apChild[iChild];
1282       int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
1283       if( cmp>0 ){
1284         /* Advance pChild until it points to iLast or laster */
1285         rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
1286         if( rc!=SQLITE_OK ){
1287           pAnd->bNomatch = 0;
1288           return rc;
1289         }
1290       }
1291 
1292       /* If the child node is now at EOF, so is the parent AND node. Otherwise,
1293       ** the child node is guaranteed to have advanced at least as far as
1294       ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
1295       ** new lastest rowid seen so far.  */
1296       assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
1297       if( pChild->bEof ){
1298         fts5ExprSetEof(pAnd);
1299         bMatch = 1;
1300         break;
1301       }else if( iLast!=pChild->iRowid ){
1302         bMatch = 0;
1303         iLast = pChild->iRowid;
1304       }
1305 
1306       if( pChild->bNomatch ){
1307         pAnd->bNomatch = 1;
1308       }
1309     }
1310   }while( bMatch==0 );
1311 
1312   if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
1313     fts5ExprNodeZeroPoslist(pAnd);
1314   }
1315   pAnd->iRowid = iLast;
1316   return SQLITE_OK;
1317 }
1318 
fts5ExprNodeNext_AND(Fts5Expr * pExpr,Fts5ExprNode * pNode,int bFromValid,i64 iFrom)1319 static int fts5ExprNodeNext_AND(
1320   Fts5Expr *pExpr,
1321   Fts5ExprNode *pNode,
1322   int bFromValid,
1323   i64 iFrom
1324 ){
1325   int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
1326   if( rc==SQLITE_OK ){
1327     rc = fts5ExprNodeTest_AND(pExpr, pNode);
1328   }else{
1329     pNode->bNomatch = 0;
1330   }
1331   return rc;
1332 }
1333 
fts5ExprNodeTest_NOT(Fts5Expr * pExpr,Fts5ExprNode * pNode)1334 static int fts5ExprNodeTest_NOT(
1335   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
1336   Fts5ExprNode *pNode             /* FTS5_NOT node to advance */
1337 ){
1338   int rc = SQLITE_OK;
1339   Fts5ExprNode *p1 = pNode->apChild[0];
1340   Fts5ExprNode *p2 = pNode->apChild[1];
1341   assert( pNode->nChild==2 );
1342 
1343   while( rc==SQLITE_OK && p1->bEof==0 ){
1344     int cmp = fts5NodeCompare(pExpr, p1, p2);
1345     if( cmp>0 ){
1346       rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
1347       cmp = fts5NodeCompare(pExpr, p1, p2);
1348     }
1349     assert( rc!=SQLITE_OK || cmp<=0 );
1350     if( cmp || p2->bNomatch ) break;
1351     rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
1352   }
1353   pNode->bEof = p1->bEof;
1354   pNode->bNomatch = p1->bNomatch;
1355   pNode->iRowid = p1->iRowid;
1356   if( p1->bEof ){
1357     fts5ExprNodeZeroPoslist(p2);
1358   }
1359   return rc;
1360 }
1361 
fts5ExprNodeNext_NOT(Fts5Expr * pExpr,Fts5ExprNode * pNode,int bFromValid,i64 iFrom)1362 static int fts5ExprNodeNext_NOT(
1363   Fts5Expr *pExpr,
1364   Fts5ExprNode *pNode,
1365   int bFromValid,
1366   i64 iFrom
1367 ){
1368   int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
1369   if( rc==SQLITE_OK ){
1370     rc = fts5ExprNodeTest_NOT(pExpr, pNode);
1371   }
1372   if( rc!=SQLITE_OK ){
1373     pNode->bNomatch = 0;
1374   }
1375   return rc;
1376 }
1377 
1378 /*
1379 ** If pNode currently points to a match, this function returns SQLITE_OK
1380 ** without modifying it. Otherwise, pNode is advanced until it does point
1381 ** to a match or EOF is reached.
1382 */
fts5ExprNodeTest(Fts5Expr * pExpr,Fts5ExprNode * pNode)1383 static int fts5ExprNodeTest(
1384   Fts5Expr *pExpr,                /* Expression of which pNode is a part */
1385   Fts5ExprNode *pNode             /* Expression node to test */
1386 ){
1387   int rc = SQLITE_OK;
1388   if( pNode->bEof==0 ){
1389     switch( pNode->eType ){
1390 
1391       case FTS5_STRING: {
1392         rc = fts5ExprNodeTest_STRING(pExpr, pNode);
1393         break;
1394       }
1395 
1396       case FTS5_TERM: {
1397         rc = fts5ExprNodeTest_TERM(pExpr, pNode);
1398         break;
1399       }
1400 
1401       case FTS5_AND: {
1402         rc = fts5ExprNodeTest_AND(pExpr, pNode);
1403         break;
1404       }
1405 
1406       case FTS5_OR: {
1407         fts5ExprNodeTest_OR(pExpr, pNode);
1408         break;
1409       }
1410 
1411       default: assert( pNode->eType==FTS5_NOT ); {
1412         rc = fts5ExprNodeTest_NOT(pExpr, pNode);
1413         break;
1414       }
1415     }
1416   }
1417   return rc;
1418 }
1419 
1420 
1421 /*
1422 ** Set node pNode, which is part of expression pExpr, to point to the first
1423 ** match. If there are no matches, set the Node.bEof flag to indicate EOF.
1424 **
1425 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
1426 ** It is not an error if there are no matches.
1427 */
fts5ExprNodeFirst(Fts5Expr * pExpr,Fts5ExprNode * pNode)1428 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
1429   int rc = SQLITE_OK;
1430   pNode->bEof = 0;
1431   pNode->bNomatch = 0;
1432 
1433   if( Fts5NodeIsString(pNode) ){
1434     /* Initialize all term iterators in the NEAR object. */
1435     rc = fts5ExprNearInitAll(pExpr, pNode);
1436   }else if( pNode->xNext==0 ){
1437     pNode->bEof = 1;
1438   }else{
1439     int i;
1440     int nEof = 0;
1441     for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
1442       Fts5ExprNode *pChild = pNode->apChild[i];
1443       rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
1444       assert( pChild->bEof==0 || pChild->bEof==1 );
1445       nEof += pChild->bEof;
1446     }
1447     pNode->iRowid = pNode->apChild[0]->iRowid;
1448 
1449     switch( pNode->eType ){
1450       case FTS5_AND:
1451         if( nEof>0 ) fts5ExprSetEof(pNode);
1452         break;
1453 
1454       case FTS5_OR:
1455         if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
1456         break;
1457 
1458       default:
1459         assert( pNode->eType==FTS5_NOT );
1460         pNode->bEof = pNode->apChild[0]->bEof;
1461         break;
1462     }
1463   }
1464 
1465   if( rc==SQLITE_OK ){
1466     rc = fts5ExprNodeTest(pExpr, pNode);
1467   }
1468   return rc;
1469 }
1470 
1471 
1472 /*
1473 ** Begin iterating through the set of documents in index pIdx matched by
1474 ** the MATCH expression passed as the first argument. If the "bDesc"
1475 ** parameter is passed a non-zero value, iteration is in descending rowid
1476 ** order. Or, if it is zero, in ascending order.
1477 **
1478 ** If iterating in ascending rowid order (bDesc==0), the first document
1479 ** visited is that with the smallest rowid that is larger than or equal
1480 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
1481 ** then the first document visited must have a rowid smaller than or
1482 ** equal to iFirst.
1483 **
1484 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
1485 ** is not considered an error if the query does not match any documents.
1486 */
sqlite3Fts5ExprFirst(Fts5Expr * p,Fts5Index * pIdx,i64 iFirst,int bDesc)1487 int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
1488   Fts5ExprNode *pRoot = p->pRoot;
1489   int rc;                         /* Return code */
1490 
1491   p->pIndex = pIdx;
1492   p->bDesc = bDesc;
1493   rc = fts5ExprNodeFirst(p, pRoot);
1494 
1495   /* If not at EOF but the current rowid occurs earlier than iFirst in
1496   ** the iteration order, move to document iFirst or later. */
1497   if( rc==SQLITE_OK
1498    && 0==pRoot->bEof
1499    && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
1500   ){
1501     rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
1502   }
1503 
1504   /* If the iterator is not at a real match, skip forward until it is. */
1505   while( pRoot->bNomatch && rc==SQLITE_OK ){
1506     assert( pRoot->bEof==0 );
1507     rc = fts5ExprNodeNext(p, pRoot, 0, 0);
1508   }
1509   return rc;
1510 }
1511 
1512 /*
1513 ** Move to the next document
1514 **
1515 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
1516 ** is not considered an error if the query does not match any documents.
1517 */
sqlite3Fts5ExprNext(Fts5Expr * p,i64 iLast)1518 int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
1519   int rc;
1520   Fts5ExprNode *pRoot = p->pRoot;
1521   assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
1522   do {
1523     rc = fts5ExprNodeNext(p, pRoot, 0, 0);
1524     assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
1525   }while( pRoot->bNomatch );
1526   if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
1527     pRoot->bEof = 1;
1528   }
1529   return rc;
1530 }
1531 
sqlite3Fts5ExprEof(Fts5Expr * p)1532 int sqlite3Fts5ExprEof(Fts5Expr *p){
1533   return p->pRoot->bEof;
1534 }
1535 
sqlite3Fts5ExprRowid(Fts5Expr * p)1536 i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
1537   return p->pRoot->iRowid;
1538 }
1539 
fts5ParseStringFromToken(Fts5Token * pToken,char ** pz)1540 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
1541   int rc = SQLITE_OK;
1542   *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
1543   return rc;
1544 }
1545 
1546 /*
1547 ** Free the phrase object passed as the only argument.
1548 */
fts5ExprPhraseFree(Fts5ExprPhrase * pPhrase)1549 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
1550   if( pPhrase ){
1551     int i;
1552     for(i=0; i<pPhrase->nTerm; i++){
1553       Fts5ExprTerm *pSyn;
1554       Fts5ExprTerm *pNext;
1555       Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
1556       sqlite3_free(pTerm->zTerm);
1557       sqlite3Fts5IterClose(pTerm->pIter);
1558       for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
1559         pNext = pSyn->pSynonym;
1560         sqlite3Fts5IterClose(pSyn->pIter);
1561         fts5BufferFree((Fts5Buffer*)&pSyn[1]);
1562         sqlite3_free(pSyn);
1563       }
1564     }
1565     if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
1566     sqlite3_free(pPhrase);
1567   }
1568 }
1569 
1570 /*
1571 ** Set the "bFirst" flag on the first token of the phrase passed as the
1572 ** only argument.
1573 */
sqlite3Fts5ParseSetCaret(Fts5ExprPhrase * pPhrase)1574 void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){
1575   if( pPhrase && pPhrase->nTerm ){
1576     pPhrase->aTerm[0].bFirst = 1;
1577   }
1578 }
1579 
1580 /*
1581 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
1582 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
1583 ** appended to it and the results returned.
1584 **
1585 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and
1586 ** NULL returned.
1587 */
sqlite3Fts5ParseNearset(Fts5Parse * pParse,Fts5ExprNearset * pNear,Fts5ExprPhrase * pPhrase)1588 Fts5ExprNearset *sqlite3Fts5ParseNearset(
1589   Fts5Parse *pParse,              /* Parse context */
1590   Fts5ExprNearset *pNear,         /* Existing nearset, or NULL */
1591   Fts5ExprPhrase *pPhrase         /* Recently parsed phrase */
1592 ){
1593   const int SZALLOC = 8;
1594   Fts5ExprNearset *pRet = 0;
1595 
1596   if( pParse->rc==SQLITE_OK ){
1597     if( pPhrase==0 ){
1598       return pNear;
1599     }
1600     if( pNear==0 ){
1601       sqlite3_int64 nByte;
1602       nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
1603       pRet = sqlite3_malloc64(nByte);
1604       if( pRet==0 ){
1605         pParse->rc = SQLITE_NOMEM;
1606       }else{
1607         memset(pRet, 0, (size_t)nByte);
1608       }
1609     }else if( (pNear->nPhrase % SZALLOC)==0 ){
1610       int nNew = pNear->nPhrase + SZALLOC;
1611       sqlite3_int64 nByte;
1612 
1613       nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
1614       pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte);
1615       if( pRet==0 ){
1616         pParse->rc = SQLITE_NOMEM;
1617       }
1618     }else{
1619       pRet = pNear;
1620     }
1621   }
1622 
1623   if( pRet==0 ){
1624     assert( pParse->rc!=SQLITE_OK );
1625     sqlite3Fts5ParseNearsetFree(pNear);
1626     sqlite3Fts5ParsePhraseFree(pPhrase);
1627   }else{
1628     if( pRet->nPhrase>0 ){
1629       Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
1630       assert( pParse!=0 );
1631       assert( pParse->apPhrase!=0 );
1632       assert( pParse->nPhrase>=2 );
1633       assert( pLast==pParse->apPhrase[pParse->nPhrase-2] );
1634       if( pPhrase->nTerm==0 ){
1635         fts5ExprPhraseFree(pPhrase);
1636         pRet->nPhrase--;
1637         pParse->nPhrase--;
1638         pPhrase = pLast;
1639       }else if( pLast->nTerm==0 ){
1640         fts5ExprPhraseFree(pLast);
1641         pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
1642         pParse->nPhrase--;
1643         pRet->nPhrase--;
1644       }
1645     }
1646     pRet->apPhrase[pRet->nPhrase++] = pPhrase;
1647   }
1648   return pRet;
1649 }
1650 
1651 typedef struct TokenCtx TokenCtx;
1652 struct TokenCtx {
1653   Fts5ExprPhrase *pPhrase;
1654   int rc;
1655 };
1656 
1657 /*
1658 ** Callback for tokenizing terms used by ParseTerm().
1659 */
fts5ParseTokenize(void * pContext,int tflags,const char * pToken,int nToken,int iUnused1,int iUnused2)1660 static int fts5ParseTokenize(
1661   void *pContext,                 /* Pointer to Fts5InsertCtx object */
1662   int tflags,                     /* Mask of FTS5_TOKEN_* flags */
1663   const char *pToken,             /* Buffer containing token */
1664   int nToken,                     /* Size of token in bytes */
1665   int iUnused1,                   /* Start offset of token */
1666   int iUnused2                    /* End offset of token */
1667 ){
1668   int rc = SQLITE_OK;
1669   const int SZALLOC = 8;
1670   TokenCtx *pCtx = (TokenCtx*)pContext;
1671   Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
1672 
1673   UNUSED_PARAM2(iUnused1, iUnused2);
1674 
1675   /* If an error has already occurred, this is a no-op */
1676   if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
1677   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
1678 
1679   if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
1680     Fts5ExprTerm *pSyn;
1681     sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
1682     pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte);
1683     if( pSyn==0 ){
1684       rc = SQLITE_NOMEM;
1685     }else{
1686       memset(pSyn, 0, (size_t)nByte);
1687       pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
1688       memcpy(pSyn->zTerm, pToken, nToken);
1689       pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
1690       pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
1691     }
1692   }else{
1693     Fts5ExprTerm *pTerm;
1694     if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
1695       Fts5ExprPhrase *pNew;
1696       int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
1697 
1698       pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase,
1699           sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
1700       );
1701       if( pNew==0 ){
1702         rc = SQLITE_NOMEM;
1703       }else{
1704         if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
1705         pCtx->pPhrase = pPhrase = pNew;
1706         pNew->nTerm = nNew - SZALLOC;
1707       }
1708     }
1709 
1710     if( rc==SQLITE_OK ){
1711       pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
1712       memset(pTerm, 0, sizeof(Fts5ExprTerm));
1713       pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
1714     }
1715   }
1716 
1717   pCtx->rc = rc;
1718   return rc;
1719 }
1720 
1721 
1722 /*
1723 ** Free the phrase object passed as the only argument.
1724 */
sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase * pPhrase)1725 void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
1726   fts5ExprPhraseFree(pPhrase);
1727 }
1728 
1729 /*
1730 ** Free the phrase object passed as the second argument.
1731 */
sqlite3Fts5ParseNearsetFree(Fts5ExprNearset * pNear)1732 void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
1733   if( pNear ){
1734     int i;
1735     for(i=0; i<pNear->nPhrase; i++){
1736       fts5ExprPhraseFree(pNear->apPhrase[i]);
1737     }
1738     sqlite3_free(pNear->pColset);
1739     sqlite3_free(pNear);
1740   }
1741 }
1742 
sqlite3Fts5ParseFinished(Fts5Parse * pParse,Fts5ExprNode * p)1743 void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
1744   assert( pParse->pExpr==0 );
1745   pParse->pExpr = p;
1746 }
1747 
parseGrowPhraseArray(Fts5Parse * pParse)1748 static int parseGrowPhraseArray(Fts5Parse *pParse){
1749   if( (pParse->nPhrase % 8)==0 ){
1750     sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
1751     Fts5ExprPhrase **apNew;
1752     apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
1753     if( apNew==0 ){
1754       pParse->rc = SQLITE_NOMEM;
1755       return SQLITE_NOMEM;
1756     }
1757     pParse->apPhrase = apNew;
1758   }
1759   return SQLITE_OK;
1760 }
1761 
1762 /*
1763 ** This function is called by the parser to process a string token. The
1764 ** string may or may not be quoted. In any case it is tokenized and a
1765 ** phrase object consisting of all tokens returned.
1766 */
sqlite3Fts5ParseTerm(Fts5Parse * pParse,Fts5ExprPhrase * pAppend,Fts5Token * pToken,int bPrefix)1767 Fts5ExprPhrase *sqlite3Fts5ParseTerm(
1768   Fts5Parse *pParse,              /* Parse context */
1769   Fts5ExprPhrase *pAppend,        /* Phrase to append to */
1770   Fts5Token *pToken,              /* String to tokenize */
1771   int bPrefix                     /* True if there is a trailing "*" */
1772 ){
1773   Fts5Config *pConfig = pParse->pConfig;
1774   TokenCtx sCtx;                  /* Context object passed to callback */
1775   int rc;                         /* Tokenize return code */
1776   char *z = 0;
1777 
1778   memset(&sCtx, 0, sizeof(TokenCtx));
1779   sCtx.pPhrase = pAppend;
1780 
1781   rc = fts5ParseStringFromToken(pToken, &z);
1782   if( rc==SQLITE_OK ){
1783     int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
1784     int n;
1785     sqlite3Fts5Dequote(z);
1786     n = (int)strlen(z);
1787     rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
1788   }
1789   sqlite3_free(z);
1790   if( rc || (rc = sCtx.rc) ){
1791     pParse->rc = rc;
1792     fts5ExprPhraseFree(sCtx.pPhrase);
1793     sCtx.pPhrase = 0;
1794   }else{
1795 
1796     if( pAppend==0 ){
1797       if( parseGrowPhraseArray(pParse) ){
1798         fts5ExprPhraseFree(sCtx.pPhrase);
1799         return 0;
1800       }
1801       pParse->nPhrase++;
1802     }
1803 
1804     if( sCtx.pPhrase==0 ){
1805       /* This happens when parsing a token or quoted phrase that contains
1806       ** no token characters at all. (e.g ... MATCH '""'). */
1807       sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
1808     }else if( sCtx.pPhrase->nTerm ){
1809       sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
1810     }
1811     pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
1812   }
1813 
1814   return sCtx.pPhrase;
1815 }
1816 
1817 /*
1818 ** Create a new FTS5 expression by cloning phrase iPhrase of the
1819 ** expression passed as the second argument.
1820 */
sqlite3Fts5ExprClonePhrase(Fts5Expr * pExpr,int iPhrase,Fts5Expr ** ppNew)1821 int sqlite3Fts5ExprClonePhrase(
1822   Fts5Expr *pExpr,
1823   int iPhrase,
1824   Fts5Expr **ppNew
1825 ){
1826   int rc = SQLITE_OK;             /* Return code */
1827   Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
1828   Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
1829   TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */
1830 
1831   pOrig = pExpr->apExprPhrase[iPhrase];
1832   pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
1833   if( rc==SQLITE_OK ){
1834     pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
1835         sizeof(Fts5ExprPhrase*));
1836   }
1837   if( rc==SQLITE_OK ){
1838     pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
1839         sizeof(Fts5ExprNode));
1840   }
1841   if( rc==SQLITE_OK ){
1842     pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
1843         sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
1844   }
1845   if( rc==SQLITE_OK ){
1846     Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
1847     if( pColsetOrig ){
1848       sqlite3_int64 nByte;
1849       Fts5Colset *pColset;
1850       nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int);
1851       pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
1852       if( pColset ){
1853         memcpy(pColset, pColsetOrig, (size_t)nByte);
1854       }
1855       pNew->pRoot->pNear->pColset = pColset;
1856     }
1857   }
1858 
1859   if( pOrig->nTerm ){
1860     int i;                          /* Used to iterate through phrase terms */
1861     for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
1862       int tflags = 0;
1863       Fts5ExprTerm *p;
1864       for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
1865         const char *zTerm = p->zTerm;
1866         rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
1867             0, 0);
1868         tflags = FTS5_TOKEN_COLOCATED;
1869       }
1870       if( rc==SQLITE_OK ){
1871         sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
1872         sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst;
1873       }
1874     }
1875   }else{
1876     /* This happens when parsing a token or quoted phrase that contains
1877     ** no token characters at all. (e.g ... MATCH '""'). */
1878     sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase));
1879   }
1880 
1881   if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){
1882     /* All the allocations succeeded. Put the expression object together. */
1883     pNew->pIndex = pExpr->pIndex;
1884     pNew->pConfig = pExpr->pConfig;
1885     pNew->nPhrase = 1;
1886     pNew->apExprPhrase[0] = sCtx.pPhrase;
1887     pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
1888     pNew->pRoot->pNear->nPhrase = 1;
1889     sCtx.pPhrase->pNode = pNew->pRoot;
1890 
1891     if( pOrig->nTerm==1
1892      && pOrig->aTerm[0].pSynonym==0
1893      && pOrig->aTerm[0].bFirst==0
1894     ){
1895       pNew->pRoot->eType = FTS5_TERM;
1896       pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
1897     }else{
1898       pNew->pRoot->eType = FTS5_STRING;
1899       pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
1900     }
1901   }else{
1902     sqlite3Fts5ExprFree(pNew);
1903     fts5ExprPhraseFree(sCtx.pPhrase);
1904     pNew = 0;
1905   }
1906 
1907   *ppNew = pNew;
1908   return rc;
1909 }
1910 
1911 
1912 /*
1913 ** Token pTok has appeared in a MATCH expression where the NEAR operator
1914 ** is expected. If token pTok does not contain "NEAR", store an error
1915 ** in the pParse object.
1916 */
sqlite3Fts5ParseNear(Fts5Parse * pParse,Fts5Token * pTok)1917 void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
1918   if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
1919     sqlite3Fts5ParseError(
1920         pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
1921     );
1922   }
1923 }
1924 
sqlite3Fts5ParseSetDistance(Fts5Parse * pParse,Fts5ExprNearset * pNear,Fts5Token * p)1925 void sqlite3Fts5ParseSetDistance(
1926   Fts5Parse *pParse,
1927   Fts5ExprNearset *pNear,
1928   Fts5Token *p
1929 ){
1930   if( pNear ){
1931     int nNear = 0;
1932     int i;
1933     if( p->n ){
1934       for(i=0; i<p->n; i++){
1935         char c = (char)p->p[i];
1936         if( c<'0' || c>'9' ){
1937           sqlite3Fts5ParseError(
1938               pParse, "expected integer, got \"%.*s\"", p->n, p->p
1939               );
1940           return;
1941         }
1942         nNear = nNear * 10 + (p->p[i] - '0');
1943       }
1944     }else{
1945       nNear = FTS5_DEFAULT_NEARDIST;
1946     }
1947     pNear->nNear = nNear;
1948   }
1949 }
1950 
1951 /*
1952 ** The second argument passed to this function may be NULL, or it may be
1953 ** an existing Fts5Colset object. This function returns a pointer to
1954 ** a new colset object containing the contents of (p) with new value column
1955 ** number iCol appended.
1956 **
1957 ** If an OOM error occurs, store an error code in pParse and return NULL.
1958 ** The old colset object (if any) is not freed in this case.
1959 */
fts5ParseColset(Fts5Parse * pParse,Fts5Colset * p,int iCol)1960 static Fts5Colset *fts5ParseColset(
1961   Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
1962   Fts5Colset *p,                  /* Existing colset object */
1963   int iCol                        /* New column to add to colset object */
1964 ){
1965   int nCol = p ? p->nCol : 0;     /* Num. columns already in colset object */
1966   Fts5Colset *pNew;               /* New colset object to return */
1967 
1968   assert( pParse->rc==SQLITE_OK );
1969   assert( iCol>=0 && iCol<pParse->pConfig->nCol );
1970 
1971   pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
1972   if( pNew==0 ){
1973     pParse->rc = SQLITE_NOMEM;
1974   }else{
1975     int *aiCol = pNew->aiCol;
1976     int i, j;
1977     for(i=0; i<nCol; i++){
1978       if( aiCol[i]==iCol ) return pNew;
1979       if( aiCol[i]>iCol ) break;
1980     }
1981     for(j=nCol; j>i; j--){
1982       aiCol[j] = aiCol[j-1];
1983     }
1984     aiCol[i] = iCol;
1985     pNew->nCol = nCol+1;
1986 
1987 #ifndef NDEBUG
1988     /* Check that the array is in order and contains no duplicate entries. */
1989     for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
1990 #endif
1991   }
1992 
1993   return pNew;
1994 }
1995 
1996 /*
1997 ** Allocate and return an Fts5Colset object specifying the inverse of
1998 ** the colset passed as the second argument. Free the colset passed
1999 ** as the second argument before returning.
2000 */
sqlite3Fts5ParseColsetInvert(Fts5Parse * pParse,Fts5Colset * p)2001 Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
2002   Fts5Colset *pRet;
2003   int nCol = pParse->pConfig->nCol;
2004 
2005   pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
2006       sizeof(Fts5Colset) + sizeof(int)*nCol
2007   );
2008   if( pRet ){
2009     int i;
2010     int iOld = 0;
2011     for(i=0; i<nCol; i++){
2012       if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
2013         pRet->aiCol[pRet->nCol++] = i;
2014       }else{
2015         iOld++;
2016       }
2017     }
2018   }
2019 
2020   sqlite3_free(p);
2021   return pRet;
2022 }
2023 
sqlite3Fts5ParseColset(Fts5Parse * pParse,Fts5Colset * pColset,Fts5Token * p)2024 Fts5Colset *sqlite3Fts5ParseColset(
2025   Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
2026   Fts5Colset *pColset,            /* Existing colset object */
2027   Fts5Token *p
2028 ){
2029   Fts5Colset *pRet = 0;
2030   int iCol;
2031   char *z;                        /* Dequoted copy of token p */
2032 
2033   z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
2034   if( pParse->rc==SQLITE_OK ){
2035     Fts5Config *pConfig = pParse->pConfig;
2036     sqlite3Fts5Dequote(z);
2037     for(iCol=0; iCol<pConfig->nCol; iCol++){
2038       if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
2039     }
2040     if( iCol==pConfig->nCol ){
2041       sqlite3Fts5ParseError(pParse, "no such column: %s", z);
2042     }else{
2043       pRet = fts5ParseColset(pParse, pColset, iCol);
2044     }
2045     sqlite3_free(z);
2046   }
2047 
2048   if( pRet==0 ){
2049     assert( pParse->rc!=SQLITE_OK );
2050     sqlite3_free(pColset);
2051   }
2052 
2053   return pRet;
2054 }
2055 
2056 /*
2057 ** If argument pOrig is NULL, or if (*pRc) is set to anything other than
2058 ** SQLITE_OK when this function is called, NULL is returned.
2059 **
2060 ** Otherwise, a copy of (*pOrig) is made into memory obtained from
2061 ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation
2062 ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
2063 */
fts5CloneColset(int * pRc,Fts5Colset * pOrig)2064 static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
2065   Fts5Colset *pRet;
2066   if( pOrig ){
2067     sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int);
2068     pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte);
2069     if( pRet ){
2070       memcpy(pRet, pOrig, (size_t)nByte);
2071     }
2072   }else{
2073     pRet = 0;
2074   }
2075   return pRet;
2076 }
2077 
2078 /*
2079 ** Remove from colset pColset any columns that are not also in colset pMerge.
2080 */
fts5MergeColset(Fts5Colset * pColset,Fts5Colset * pMerge)2081 static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){
2082   int iIn = 0;          /* Next input in pColset */
2083   int iMerge = 0;       /* Next input in pMerge */
2084   int iOut = 0;         /* Next output slot in pColset */
2085 
2086   while( iIn<pColset->nCol && iMerge<pMerge->nCol ){
2087     int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge];
2088     if( iDiff==0 ){
2089       pColset->aiCol[iOut++] = pMerge->aiCol[iMerge];
2090       iMerge++;
2091       iIn++;
2092     }else if( iDiff>0 ){
2093       iMerge++;
2094     }else{
2095       iIn++;
2096     }
2097   }
2098   pColset->nCol = iOut;
2099 }
2100 
2101 /*
2102 ** Recursively apply colset pColset to expression node pNode and all of
2103 ** its decendents. If (*ppFree) is not NULL, it contains a spare copy
2104 ** of pColset. This function may use the spare copy and set (*ppFree) to
2105 ** zero, or it may create copies of pColset using fts5CloneColset().
2106 */
fts5ParseSetColset(Fts5Parse * pParse,Fts5ExprNode * pNode,Fts5Colset * pColset,Fts5Colset ** ppFree)2107 static void fts5ParseSetColset(
2108   Fts5Parse *pParse,
2109   Fts5ExprNode *pNode,
2110   Fts5Colset *pColset,
2111   Fts5Colset **ppFree
2112 ){
2113   if( pParse->rc==SQLITE_OK ){
2114     assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING
2115          || pNode->eType==FTS5_AND  || pNode->eType==FTS5_OR
2116          || pNode->eType==FTS5_NOT  || pNode->eType==FTS5_EOF
2117     );
2118     if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
2119       Fts5ExprNearset *pNear = pNode->pNear;
2120       if( pNear->pColset ){
2121         fts5MergeColset(pNear->pColset, pColset);
2122         if( pNear->pColset->nCol==0 ){
2123           pNode->eType = FTS5_EOF;
2124           pNode->xNext = 0;
2125         }
2126       }else if( *ppFree ){
2127         pNear->pColset = pColset;
2128         *ppFree = 0;
2129       }else{
2130         pNear->pColset = fts5CloneColset(&pParse->rc, pColset);
2131       }
2132     }else{
2133       int i;
2134       assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 );
2135       for(i=0; i<pNode->nChild; i++){
2136         fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree);
2137       }
2138     }
2139   }
2140 }
2141 
2142 /*
2143 ** Apply colset pColset to expression node pExpr and all of its descendents.
2144 */
sqlite3Fts5ParseSetColset(Fts5Parse * pParse,Fts5ExprNode * pExpr,Fts5Colset * pColset)2145 void sqlite3Fts5ParseSetColset(
2146   Fts5Parse *pParse,
2147   Fts5ExprNode *pExpr,
2148   Fts5Colset *pColset
2149 ){
2150   Fts5Colset *pFree = pColset;
2151   if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
2152     sqlite3Fts5ParseError(pParse,
2153         "fts5: column queries are not supported (detail=none)"
2154     );
2155   }else{
2156     fts5ParseSetColset(pParse, pExpr, pColset, &pFree);
2157   }
2158   sqlite3_free(pFree);
2159 }
2160 
fts5ExprAssignXNext(Fts5ExprNode * pNode)2161 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
2162   switch( pNode->eType ){
2163     case FTS5_STRING: {
2164       Fts5ExprNearset *pNear = pNode->pNear;
2165       if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
2166        && pNear->apPhrase[0]->aTerm[0].pSynonym==0
2167        && pNear->apPhrase[0]->aTerm[0].bFirst==0
2168       ){
2169         pNode->eType = FTS5_TERM;
2170         pNode->xNext = fts5ExprNodeNext_TERM;
2171       }else{
2172         pNode->xNext = fts5ExprNodeNext_STRING;
2173       }
2174       break;
2175     };
2176 
2177     case FTS5_OR: {
2178       pNode->xNext = fts5ExprNodeNext_OR;
2179       break;
2180     };
2181 
2182     case FTS5_AND: {
2183       pNode->xNext = fts5ExprNodeNext_AND;
2184       break;
2185     };
2186 
2187     default: assert( pNode->eType==FTS5_NOT ); {
2188       pNode->xNext = fts5ExprNodeNext_NOT;
2189       break;
2190     };
2191   }
2192 }
2193 
fts5ExprAddChildren(Fts5ExprNode * p,Fts5ExprNode * pSub)2194 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
2195   if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
2196     int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
2197     memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
2198     p->nChild += pSub->nChild;
2199     sqlite3_free(pSub);
2200   }else{
2201     p->apChild[p->nChild++] = pSub;
2202   }
2203 }
2204 
2205 /*
2206 ** This function is used when parsing LIKE or GLOB patterns against
2207 ** trigram indexes that specify either detail=column or detail=none.
2208 ** It converts a phrase:
2209 **
2210 **     abc + def + ghi
2211 **
2212 ** into an AND tree:
2213 **
2214 **     abc AND def AND ghi
2215 */
fts5ParsePhraseToAnd(Fts5Parse * pParse,Fts5ExprNearset * pNear)2216 static Fts5ExprNode *fts5ParsePhraseToAnd(
2217   Fts5Parse *pParse,
2218   Fts5ExprNearset *pNear
2219 ){
2220   int nTerm = pNear->apPhrase[0]->nTerm;
2221   int ii;
2222   int nByte;
2223   Fts5ExprNode *pRet;
2224 
2225   assert( pNear->nPhrase==1 );
2226   assert( pParse->bPhraseToAnd );
2227 
2228   nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*);
2229   pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
2230   if( pRet ){
2231     pRet->eType = FTS5_AND;
2232     pRet->nChild = nTerm;
2233     fts5ExprAssignXNext(pRet);
2234     pParse->nPhrase--;
2235     for(ii=0; ii<nTerm; ii++){
2236       Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
2237           &pParse->rc, sizeof(Fts5ExprPhrase)
2238       );
2239       if( pPhrase ){
2240         if( parseGrowPhraseArray(pParse) ){
2241           fts5ExprPhraseFree(pPhrase);
2242         }else{
2243           pParse->apPhrase[pParse->nPhrase++] = pPhrase;
2244           pPhrase->nTerm = 1;
2245           pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
2246               &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
2247           );
2248           pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
2249               0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
2250           );
2251         }
2252       }
2253     }
2254 
2255     if( pParse->rc ){
2256       sqlite3Fts5ParseNodeFree(pRet);
2257       pRet = 0;
2258     }else{
2259       sqlite3Fts5ParseNearsetFree(pNear);
2260     }
2261   }
2262 
2263   return pRet;
2264 }
2265 
2266 /*
2267 ** Allocate and return a new expression object. If anything goes wrong (i.e.
2268 ** OOM error), leave an error code in pParse and return NULL.
2269 */
sqlite3Fts5ParseNode(Fts5Parse * pParse,int eType,Fts5ExprNode * pLeft,Fts5ExprNode * pRight,Fts5ExprNearset * pNear)2270 Fts5ExprNode *sqlite3Fts5ParseNode(
2271   Fts5Parse *pParse,              /* Parse context */
2272   int eType,                      /* FTS5_STRING, AND, OR or NOT */
2273   Fts5ExprNode *pLeft,            /* Left hand child expression */
2274   Fts5ExprNode *pRight,           /* Right hand child expression */
2275   Fts5ExprNearset *pNear          /* For STRING expressions, the near cluster */
2276 ){
2277   Fts5ExprNode *pRet = 0;
2278 
2279   if( pParse->rc==SQLITE_OK ){
2280     int nChild = 0;               /* Number of children of returned node */
2281     sqlite3_int64 nByte;          /* Bytes of space to allocate for this node */
2282 
2283     assert( (eType!=FTS5_STRING && !pNear)
2284          || (eType==FTS5_STRING && !pLeft && !pRight)
2285     );
2286     if( eType==FTS5_STRING && pNear==0 ) return 0;
2287     if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
2288     if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
2289 
2290     if( eType==FTS5_STRING
2291      && pParse->bPhraseToAnd
2292      && pNear->apPhrase[0]->nTerm>1
2293     ){
2294       pRet = fts5ParsePhraseToAnd(pParse, pNear);
2295     }else{
2296       if( eType==FTS5_NOT ){
2297         nChild = 2;
2298       }else if( eType==FTS5_AND || eType==FTS5_OR ){
2299         nChild = 2;
2300         if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
2301         if( pRight->eType==eType ) nChild += pRight->nChild-1;
2302       }
2303 
2304       nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
2305       pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
2306 
2307       if( pRet ){
2308         pRet->eType = eType;
2309         pRet->pNear = pNear;
2310         fts5ExprAssignXNext(pRet);
2311         if( eType==FTS5_STRING ){
2312           int iPhrase;
2313           for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
2314             pNear->apPhrase[iPhrase]->pNode = pRet;
2315             if( pNear->apPhrase[iPhrase]->nTerm==0 ){
2316               pRet->xNext = 0;
2317               pRet->eType = FTS5_EOF;
2318             }
2319           }
2320 
2321           if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){
2322             Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
2323             if( pNear->nPhrase!=1
2324                 || pPhrase->nTerm>1
2325                 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst)
2326               ){
2327               sqlite3Fts5ParseError(pParse,
2328                   "fts5: %s queries are not supported (detail!=full)",
2329                   pNear->nPhrase==1 ? "phrase": "NEAR"
2330               );
2331               sqlite3_free(pRet);
2332               pRet = 0;
2333             }
2334           }
2335         }else{
2336           fts5ExprAddChildren(pRet, pLeft);
2337           fts5ExprAddChildren(pRet, pRight);
2338         }
2339       }
2340     }
2341   }
2342 
2343   if( pRet==0 ){
2344     assert( pParse->rc!=SQLITE_OK );
2345     sqlite3Fts5ParseNodeFree(pLeft);
2346     sqlite3Fts5ParseNodeFree(pRight);
2347     sqlite3Fts5ParseNearsetFree(pNear);
2348   }
2349   return pRet;
2350 }
2351 
sqlite3Fts5ParseImplicitAnd(Fts5Parse * pParse,Fts5ExprNode * pLeft,Fts5ExprNode * pRight)2352 Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
2353   Fts5Parse *pParse,              /* Parse context */
2354   Fts5ExprNode *pLeft,            /* Left hand child expression */
2355   Fts5ExprNode *pRight            /* Right hand child expression */
2356 ){
2357   Fts5ExprNode *pRet = 0;
2358   Fts5ExprNode *pPrev;
2359 
2360   if( pParse->rc ){
2361     sqlite3Fts5ParseNodeFree(pLeft);
2362     sqlite3Fts5ParseNodeFree(pRight);
2363   }else{
2364 
2365     assert( pLeft->eType==FTS5_STRING
2366         || pLeft->eType==FTS5_TERM
2367         || pLeft->eType==FTS5_EOF
2368         || pLeft->eType==FTS5_AND
2369     );
2370     assert( pRight->eType==FTS5_STRING
2371         || pRight->eType==FTS5_TERM
2372         || pRight->eType==FTS5_EOF
2373     );
2374 
2375     if( pLeft->eType==FTS5_AND ){
2376       pPrev = pLeft->apChild[pLeft->nChild-1];
2377     }else{
2378       pPrev = pLeft;
2379     }
2380     assert( pPrev->eType==FTS5_STRING
2381         || pPrev->eType==FTS5_TERM
2382         || pPrev->eType==FTS5_EOF
2383         );
2384 
2385     if( pRight->eType==FTS5_EOF ){
2386       assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
2387       sqlite3Fts5ParseNodeFree(pRight);
2388       pRet = pLeft;
2389       pParse->nPhrase--;
2390     }
2391     else if( pPrev->eType==FTS5_EOF ){
2392       Fts5ExprPhrase **ap;
2393 
2394       if( pPrev==pLeft ){
2395         pRet = pRight;
2396       }else{
2397         pLeft->apChild[pLeft->nChild-1] = pRight;
2398         pRet = pLeft;
2399       }
2400 
2401       ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
2402       assert( ap[0]==pPrev->pNear->apPhrase[0] );
2403       memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
2404       pParse->nPhrase--;
2405 
2406       sqlite3Fts5ParseNodeFree(pPrev);
2407     }
2408     else{
2409       pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
2410     }
2411   }
2412 
2413   return pRet;
2414 }
2415 
2416 #ifdef SQLITE_TEST
fts5ExprTermPrint(Fts5ExprTerm * pTerm)2417 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
2418   sqlite3_int64 nByte = 0;
2419   Fts5ExprTerm *p;
2420   char *zQuoted;
2421 
2422   /* Determine the maximum amount of space required. */
2423   for(p=pTerm; p; p=p->pSynonym){
2424     nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
2425   }
2426   zQuoted = sqlite3_malloc64(nByte);
2427 
2428   if( zQuoted ){
2429     int i = 0;
2430     for(p=pTerm; p; p=p->pSynonym){
2431       char *zIn = p->zTerm;
2432       zQuoted[i++] = '"';
2433       while( *zIn ){
2434         if( *zIn=='"' ) zQuoted[i++] = '"';
2435         zQuoted[i++] = *zIn++;
2436       }
2437       zQuoted[i++] = '"';
2438       if( p->pSynonym ) zQuoted[i++] = '|';
2439     }
2440     if( pTerm->bPrefix ){
2441       zQuoted[i++] = ' ';
2442       zQuoted[i++] = '*';
2443     }
2444     zQuoted[i++] = '\0';
2445   }
2446   return zQuoted;
2447 }
2448 
fts5PrintfAppend(char * zApp,const char * zFmt,...)2449 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
2450   char *zNew;
2451   va_list ap;
2452   va_start(ap, zFmt);
2453   zNew = sqlite3_vmprintf(zFmt, ap);
2454   va_end(ap);
2455   if( zApp && zNew ){
2456     char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
2457     sqlite3_free(zNew);
2458     zNew = zNew2;
2459   }
2460   sqlite3_free(zApp);
2461   return zNew;
2462 }
2463 
2464 /*
2465 ** Compose a tcl-readable representation of expression pExpr. Return a
2466 ** pointer to a buffer containing that representation. It is the
2467 ** responsibility of the caller to at some point free the buffer using
2468 ** sqlite3_free().
2469 */
fts5ExprPrintTcl(Fts5Config * pConfig,const char * zNearsetCmd,Fts5ExprNode * pExpr)2470 static char *fts5ExprPrintTcl(
2471   Fts5Config *pConfig,
2472   const char *zNearsetCmd,
2473   Fts5ExprNode *pExpr
2474 ){
2475   char *zRet = 0;
2476   if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
2477     Fts5ExprNearset *pNear = pExpr->pNear;
2478     int i;
2479     int iTerm;
2480 
2481     zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
2482     if( zRet==0 ) return 0;
2483     if( pNear->pColset ){
2484       int *aiCol = pNear->pColset->aiCol;
2485       int nCol = pNear->pColset->nCol;
2486       if( nCol==1 ){
2487         zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
2488       }else{
2489         zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
2490         for(i=1; i<pNear->pColset->nCol; i++){
2491           zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
2492         }
2493         zRet = fts5PrintfAppend(zRet, "} ");
2494       }
2495       if( zRet==0 ) return 0;
2496     }
2497 
2498     if( pNear->nPhrase>1 ){
2499       zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
2500       if( zRet==0 ) return 0;
2501     }
2502 
2503     zRet = fts5PrintfAppend(zRet, "--");
2504     if( zRet==0 ) return 0;
2505 
2506     for(i=0; i<pNear->nPhrase; i++){
2507       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
2508 
2509       zRet = fts5PrintfAppend(zRet, " {");
2510       for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
2511         char *zTerm = pPhrase->aTerm[iTerm].zTerm;
2512         zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
2513         if( pPhrase->aTerm[iTerm].bPrefix ){
2514           zRet = fts5PrintfAppend(zRet, "*");
2515         }
2516       }
2517 
2518       if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
2519       if( zRet==0 ) return 0;
2520     }
2521 
2522   }else{
2523     char const *zOp = 0;
2524     int i;
2525     switch( pExpr->eType ){
2526       case FTS5_AND: zOp = "AND"; break;
2527       case FTS5_NOT: zOp = "NOT"; break;
2528       default:
2529         assert( pExpr->eType==FTS5_OR );
2530         zOp = "OR";
2531         break;
2532     }
2533 
2534     zRet = sqlite3_mprintf("%s", zOp);
2535     for(i=0; zRet && i<pExpr->nChild; i++){
2536       char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
2537       if( !z ){
2538         sqlite3_free(zRet);
2539         zRet = 0;
2540       }else{
2541         zRet = fts5PrintfAppend(zRet, " [%z]", z);
2542       }
2543     }
2544   }
2545 
2546   return zRet;
2547 }
2548 
fts5ExprPrint(Fts5Config * pConfig,Fts5ExprNode * pExpr)2549 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
2550   char *zRet = 0;
2551   if( pExpr->eType==0 ){
2552     return sqlite3_mprintf("\"\"");
2553   }else
2554   if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
2555     Fts5ExprNearset *pNear = pExpr->pNear;
2556     int i;
2557     int iTerm;
2558 
2559     if( pNear->pColset ){
2560       int ii;
2561       Fts5Colset *pColset = pNear->pColset;
2562       if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{");
2563       for(ii=0; ii<pColset->nCol; ii++){
2564         zRet = fts5PrintfAppend(zRet, "%s%s",
2565             pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " "
2566         );
2567       }
2568       if( zRet ){
2569         zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : "");
2570       }
2571       if( zRet==0 ) return 0;
2572     }
2573 
2574     if( pNear->nPhrase>1 ){
2575       zRet = fts5PrintfAppend(zRet, "NEAR(");
2576       if( zRet==0 ) return 0;
2577     }
2578 
2579     for(i=0; i<pNear->nPhrase; i++){
2580       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
2581       if( i!=0 ){
2582         zRet = fts5PrintfAppend(zRet, " ");
2583         if( zRet==0 ) return 0;
2584       }
2585       for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
2586         char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
2587         if( zTerm ){
2588           zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
2589           sqlite3_free(zTerm);
2590         }
2591         if( zTerm==0 || zRet==0 ){
2592           sqlite3_free(zRet);
2593           return 0;
2594         }
2595       }
2596     }
2597 
2598     if( pNear->nPhrase>1 ){
2599       zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
2600       if( zRet==0 ) return 0;
2601     }
2602 
2603   }else{
2604     char const *zOp = 0;
2605     int i;
2606 
2607     switch( pExpr->eType ){
2608       case FTS5_AND: zOp = " AND "; break;
2609       case FTS5_NOT: zOp = " NOT "; break;
2610       default:
2611         assert( pExpr->eType==FTS5_OR );
2612         zOp = " OR ";
2613         break;
2614     }
2615 
2616     for(i=0; i<pExpr->nChild; i++){
2617       char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
2618       if( z==0 ){
2619         sqlite3_free(zRet);
2620         zRet = 0;
2621       }else{
2622         int e = pExpr->apChild[i]->eType;
2623         int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
2624         zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
2625             (i==0 ? "" : zOp),
2626             (b?"(":""), z, (b?")":"")
2627         );
2628       }
2629       if( zRet==0 ) break;
2630     }
2631   }
2632 
2633   return zRet;
2634 }
2635 
2636 /*
2637 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
2638 ** and fts5_expr_tcl() (bTcl!=0).
2639 */
fts5ExprFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal,int bTcl)2640 static void fts5ExprFunction(
2641   sqlite3_context *pCtx,          /* Function call context */
2642   int nArg,                       /* Number of args */
2643   sqlite3_value **apVal,          /* Function arguments */
2644   int bTcl
2645 ){
2646   Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
2647   sqlite3 *db = sqlite3_context_db_handle(pCtx);
2648   const char *zExpr = 0;
2649   char *zErr = 0;
2650   Fts5Expr *pExpr = 0;
2651   int rc;
2652   int i;
2653 
2654   const char **azConfig;          /* Array of arguments for Fts5Config */
2655   const char *zNearsetCmd = "nearset";
2656   int nConfig;                    /* Size of azConfig[] */
2657   Fts5Config *pConfig = 0;
2658   int iArg = 1;
2659 
2660   if( nArg<1 ){
2661     zErr = sqlite3_mprintf("wrong number of arguments to function %s",
2662         bTcl ? "fts5_expr_tcl" : "fts5_expr"
2663     );
2664     sqlite3_result_error(pCtx, zErr, -1);
2665     sqlite3_free(zErr);
2666     return;
2667   }
2668 
2669   if( bTcl && nArg>1 ){
2670     zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
2671     iArg = 2;
2672   }
2673 
2674   nConfig = 3 + (nArg-iArg);
2675   azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig);
2676   if( azConfig==0 ){
2677     sqlite3_result_error_nomem(pCtx);
2678     return;
2679   }
2680   azConfig[0] = 0;
2681   azConfig[1] = "main";
2682   azConfig[2] = "tbl";
2683   for(i=3; iArg<nArg; iArg++){
2684     const char *z = (const char*)sqlite3_value_text(apVal[iArg]);
2685     azConfig[i++] = (z ? z : "");
2686   }
2687 
2688   zExpr = (const char*)sqlite3_value_text(apVal[0]);
2689   if( zExpr==0 ) zExpr = "";
2690 
2691   rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
2692   if( rc==SQLITE_OK ){
2693     rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
2694   }
2695   if( rc==SQLITE_OK ){
2696     char *zText;
2697     if( pExpr->pRoot->xNext==0 ){
2698       zText = sqlite3_mprintf("");
2699     }else if( bTcl ){
2700       zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
2701     }else{
2702       zText = fts5ExprPrint(pConfig, pExpr->pRoot);
2703     }
2704     if( zText==0 ){
2705       rc = SQLITE_NOMEM;
2706     }else{
2707       sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
2708       sqlite3_free(zText);
2709     }
2710   }
2711 
2712   if( rc!=SQLITE_OK ){
2713     if( zErr ){
2714       sqlite3_result_error(pCtx, zErr, -1);
2715       sqlite3_free(zErr);
2716     }else{
2717       sqlite3_result_error_code(pCtx, rc);
2718     }
2719   }
2720   sqlite3_free((void *)azConfig);
2721   sqlite3Fts5ConfigFree(pConfig);
2722   sqlite3Fts5ExprFree(pExpr);
2723 }
2724 
fts5ExprFunctionHr(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)2725 static void fts5ExprFunctionHr(
2726   sqlite3_context *pCtx,          /* Function call context */
2727   int nArg,                       /* Number of args */
2728   sqlite3_value **apVal           /* Function arguments */
2729 ){
2730   fts5ExprFunction(pCtx, nArg, apVal, 0);
2731 }
fts5ExprFunctionTcl(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)2732 static void fts5ExprFunctionTcl(
2733   sqlite3_context *pCtx,          /* Function call context */
2734   int nArg,                       /* Number of args */
2735   sqlite3_value **apVal           /* Function arguments */
2736 ){
2737   fts5ExprFunction(pCtx, nArg, apVal, 1);
2738 }
2739 
2740 /*
2741 ** The implementation of an SQLite user-defined-function that accepts a
2742 ** single integer as an argument. If the integer is an alpha-numeric
2743 ** unicode code point, 1 is returned. Otherwise 0.
2744 */
fts5ExprIsAlnum(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)2745 static void fts5ExprIsAlnum(
2746   sqlite3_context *pCtx,          /* Function call context */
2747   int nArg,                       /* Number of args */
2748   sqlite3_value **apVal           /* Function arguments */
2749 ){
2750   int iCode;
2751   u8 aArr[32];
2752   if( nArg!=1 ){
2753     sqlite3_result_error(pCtx,
2754         "wrong number of arguments to function fts5_isalnum", -1
2755     );
2756     return;
2757   }
2758   memset(aArr, 0, sizeof(aArr));
2759   sqlite3Fts5UnicodeCatParse("L*", aArr);
2760   sqlite3Fts5UnicodeCatParse("N*", aArr);
2761   sqlite3Fts5UnicodeCatParse("Co", aArr);
2762   iCode = sqlite3_value_int(apVal[0]);
2763   sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
2764 }
2765 
fts5ExprFold(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)2766 static void fts5ExprFold(
2767   sqlite3_context *pCtx,          /* Function call context */
2768   int nArg,                       /* Number of args */
2769   sqlite3_value **apVal           /* Function arguments */
2770 ){
2771   if( nArg!=1 && nArg!=2 ){
2772     sqlite3_result_error(pCtx,
2773         "wrong number of arguments to function fts5_fold", -1
2774     );
2775   }else{
2776     int iCode;
2777     int bRemoveDiacritics = 0;
2778     iCode = sqlite3_value_int(apVal[0]);
2779     if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
2780     sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
2781   }
2782 }
2783 #endif /* ifdef SQLITE_TEST */
2784 
2785 /*
2786 ** This is called during initialization to register the fts5_expr() scalar
2787 ** UDF with the SQLite handle passed as the only argument.
2788 */
sqlite3Fts5ExprInit(Fts5Global * pGlobal,sqlite3 * db)2789 int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
2790 #ifdef SQLITE_TEST
2791   struct Fts5ExprFunc {
2792     const char *z;
2793     void (*x)(sqlite3_context*,int,sqlite3_value**);
2794   } aFunc[] = {
2795     { "fts5_expr",     fts5ExprFunctionHr },
2796     { "fts5_expr_tcl", fts5ExprFunctionTcl },
2797     { "fts5_isalnum",  fts5ExprIsAlnum },
2798     { "fts5_fold",     fts5ExprFold },
2799   };
2800   int i;
2801   int rc = SQLITE_OK;
2802   void *pCtx = (void*)pGlobal;
2803 
2804   for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
2805     struct Fts5ExprFunc *p = &aFunc[i];
2806     rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
2807   }
2808 #else
2809   int rc = SQLITE_OK;
2810   UNUSED_PARAM2(pGlobal,db);
2811 #endif
2812 
2813   /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
2814   ** sqlite3Fts5ParserFallback() are unused */
2815 #ifndef NDEBUG
2816   (void)sqlite3Fts5ParserTrace;
2817 #endif
2818   (void)sqlite3Fts5ParserFallback;
2819 
2820   return rc;
2821 }
2822 
2823 /*
2824 ** Return the number of phrases in expression pExpr.
2825 */
sqlite3Fts5ExprPhraseCount(Fts5Expr * pExpr)2826 int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
2827   return (pExpr ? pExpr->nPhrase : 0);
2828 }
2829 
2830 /*
2831 ** Return the number of terms in the iPhrase'th phrase in pExpr.
2832 */
sqlite3Fts5ExprPhraseSize(Fts5Expr * pExpr,int iPhrase)2833 int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
2834   if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
2835   return pExpr->apExprPhrase[iPhrase]->nTerm;
2836 }
2837 
2838 /*
2839 ** This function is used to access the current position list for phrase
2840 ** iPhrase.
2841 */
sqlite3Fts5ExprPoslist(Fts5Expr * pExpr,int iPhrase,const u8 ** pa)2842 int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
2843   int nRet;
2844   Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
2845   Fts5ExprNode *pNode = pPhrase->pNode;
2846   if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
2847     *pa = pPhrase->poslist.p;
2848     nRet = pPhrase->poslist.n;
2849   }else{
2850     *pa = 0;
2851     nRet = 0;
2852   }
2853   return nRet;
2854 }
2855 
2856 struct Fts5PoslistPopulator {
2857   Fts5PoslistWriter writer;
2858   int bOk;                        /* True if ok to populate */
2859   int bMiss;
2860 };
2861 
2862 /*
2863 ** Clear the position lists associated with all phrases in the expression
2864 ** passed as the first argument. Argument bLive is true if the expression
2865 ** might be pointing to a real entry, otherwise it has just been reset.
2866 **
2867 ** At present this function is only used for detail=col and detail=none
2868 ** fts5 tables. This implies that all phrases must be at most 1 token
2869 ** in size, as phrase matches are not supported without detail=full.
2870 */
sqlite3Fts5ExprClearPoslists(Fts5Expr * pExpr,int bLive)2871 Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
2872   Fts5PoslistPopulator *pRet;
2873   pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
2874   if( pRet ){
2875     int i;
2876     memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
2877     for(i=0; i<pExpr->nPhrase; i++){
2878       Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
2879       Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
2880       assert( pExpr->apExprPhrase[i]->nTerm<=1 );
2881       if( bLive &&
2882           (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
2883       ){
2884         pRet[i].bMiss = 1;
2885       }else{
2886         pBuf->n = 0;
2887       }
2888     }
2889   }
2890   return pRet;
2891 }
2892 
2893 struct Fts5ExprCtx {
2894   Fts5Expr *pExpr;
2895   Fts5PoslistPopulator *aPopulator;
2896   i64 iOff;
2897 };
2898 typedef struct Fts5ExprCtx Fts5ExprCtx;
2899 
2900 /*
2901 ** TODO: Make this more efficient!
2902 */
fts5ExprColsetTest(Fts5Colset * pColset,int iCol)2903 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
2904   int i;
2905   for(i=0; i<pColset->nCol; i++){
2906     if( pColset->aiCol[i]==iCol ) return 1;
2907   }
2908   return 0;
2909 }
2910 
fts5ExprPopulatePoslistsCb(void * pCtx,int tflags,const char * pToken,int nToken,int iUnused1,int iUnused2)2911 static int fts5ExprPopulatePoslistsCb(
2912   void *pCtx,                /* Copy of 2nd argument to xTokenize() */
2913   int tflags,                /* Mask of FTS5_TOKEN_* flags */
2914   const char *pToken,        /* Pointer to buffer containing token */
2915   int nToken,                /* Size of token in bytes */
2916   int iUnused1,              /* Byte offset of token within input text */
2917   int iUnused2               /* Byte offset of end of token within input text */
2918 ){
2919   Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
2920   Fts5Expr *pExpr = p->pExpr;
2921   int i;
2922 
2923   UNUSED_PARAM2(iUnused1, iUnused2);
2924 
2925   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
2926   if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
2927   for(i=0; i<pExpr->nPhrase; i++){
2928     Fts5ExprTerm *pTerm;
2929     if( p->aPopulator[i].bOk==0 ) continue;
2930     for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
2931       int nTerm = (int)strlen(pTerm->zTerm);
2932       if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
2933        && memcmp(pTerm->zTerm, pToken, nTerm)==0
2934       ){
2935         int rc = sqlite3Fts5PoslistWriterAppend(
2936             &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
2937         );
2938         if( rc ) return rc;
2939         break;
2940       }
2941     }
2942   }
2943   return SQLITE_OK;
2944 }
2945 
sqlite3Fts5ExprPopulatePoslists(Fts5Config * pConfig,Fts5Expr * pExpr,Fts5PoslistPopulator * aPopulator,int iCol,const char * z,int n)2946 int sqlite3Fts5ExprPopulatePoslists(
2947   Fts5Config *pConfig,
2948   Fts5Expr *pExpr,
2949   Fts5PoslistPopulator *aPopulator,
2950   int iCol,
2951   const char *z, int n
2952 ){
2953   int i;
2954   Fts5ExprCtx sCtx;
2955   sCtx.pExpr = pExpr;
2956   sCtx.aPopulator = aPopulator;
2957   sCtx.iOff = (((i64)iCol) << 32) - 1;
2958 
2959   for(i=0; i<pExpr->nPhrase; i++){
2960     Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
2961     Fts5Colset *pColset = pNode->pNear->pColset;
2962     if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
2963      || aPopulator[i].bMiss
2964     ){
2965       aPopulator[i].bOk = 0;
2966     }else{
2967       aPopulator[i].bOk = 1;
2968     }
2969   }
2970 
2971   return sqlite3Fts5Tokenize(pConfig,
2972       FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
2973   );
2974 }
2975 
fts5ExprClearPoslists(Fts5ExprNode * pNode)2976 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
2977   if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
2978     pNode->pNear->apPhrase[0]->poslist.n = 0;
2979   }else{
2980     int i;
2981     for(i=0; i<pNode->nChild; i++){
2982       fts5ExprClearPoslists(pNode->apChild[i]);
2983     }
2984   }
2985 }
2986 
fts5ExprCheckPoslists(Fts5ExprNode * pNode,i64 iRowid)2987 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
2988   pNode->iRowid = iRowid;
2989   pNode->bEof = 0;
2990   switch( pNode->eType ){
2991     case FTS5_TERM:
2992     case FTS5_STRING:
2993       return (pNode->pNear->apPhrase[0]->poslist.n>0);
2994 
2995     case FTS5_AND: {
2996       int i;
2997       for(i=0; i<pNode->nChild; i++){
2998         if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
2999           fts5ExprClearPoslists(pNode);
3000           return 0;
3001         }
3002       }
3003       break;
3004     }
3005 
3006     case FTS5_OR: {
3007       int i;
3008       int bRet = 0;
3009       for(i=0; i<pNode->nChild; i++){
3010         if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
3011           bRet = 1;
3012         }
3013       }
3014       return bRet;
3015     }
3016 
3017     default: {
3018       assert( pNode->eType==FTS5_NOT );
3019       if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
3020           || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
3021         ){
3022         fts5ExprClearPoslists(pNode);
3023         return 0;
3024       }
3025       break;
3026     }
3027   }
3028   return 1;
3029 }
3030 
sqlite3Fts5ExprCheckPoslists(Fts5Expr * pExpr,i64 iRowid)3031 void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
3032   fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
3033 }
3034 
3035 /*
3036 ** This function is only called for detail=columns tables.
3037 */
sqlite3Fts5ExprPhraseCollist(Fts5Expr * pExpr,int iPhrase,const u8 ** ppCollist,int * pnCollist)3038 int sqlite3Fts5ExprPhraseCollist(
3039   Fts5Expr *pExpr,
3040   int iPhrase,
3041   const u8 **ppCollist,
3042   int *pnCollist
3043 ){
3044   Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
3045   Fts5ExprNode *pNode = pPhrase->pNode;
3046   int rc = SQLITE_OK;
3047 
3048   assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
3049   assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3050 
3051   if( pNode->bEof==0
3052    && pNode->iRowid==pExpr->pRoot->iRowid
3053    && pPhrase->poslist.n>0
3054   ){
3055     Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
3056     if( pTerm->pSynonym ){
3057       Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
3058       rc = fts5ExprSynonymList(
3059           pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
3060       );
3061     }else{
3062       *ppCollist = pPhrase->aTerm[0].pIter->pData;
3063       *pnCollist = pPhrase->aTerm[0].pIter->nData;
3064     }
3065   }else{
3066     *ppCollist = 0;
3067     *pnCollist = 0;
3068   }
3069 
3070   return rc;
3071 }
3072