xref: /sqlite-3.40.0/ext/fts3/fts3_snippet.c (revision 35552744)
1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 */
13 
14 #include "fts3Int.h"
15 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
16 
17 #include <string.h>
18 #include <assert.h>
19 
20 #ifndef SQLITE_AMALGAMATION
21 typedef sqlite3_int64 i64;
22 #endif
23 
24 /*
25 ** Characters that may appear in the second argument to matchinfo().
26 */
27 #define FTS3_MATCHINFO_NPHRASE   'p'        /* 1 value */
28 #define FTS3_MATCHINFO_NCOL      'c'        /* 1 value */
29 #define FTS3_MATCHINFO_NDOC      'n'        /* 1 value */
30 #define FTS3_MATCHINFO_AVGLENGTH 'a'        /* nCol values */
31 #define FTS3_MATCHINFO_LENGTH    'l'        /* nCol values */
32 #define FTS3_MATCHINFO_LCS       's'        /* nCol values */
33 #define FTS3_MATCHINFO_HITS      'x'        /* 3*nCol*nPhrase values */
34 #define FTS3_MATCHINFO_LHITS     'y'        /* nCol*nPhrase values */
35 #define FTS3_MATCHINFO_LHITS_BM  'b'        /* nCol*nPhrase values */
36 
37 /*
38 ** The default value for the second argument to matchinfo().
39 */
40 #define FTS3_MATCHINFO_DEFAULT   "pcx"
41 
42 
43 /*
44 ** Used as an fts3ExprIterate() context when loading phrase doclists to
45 ** Fts3Expr.aDoclist[]/nDoclist.
46 */
47 typedef struct LoadDoclistCtx LoadDoclistCtx;
48 struct LoadDoclistCtx {
49   Fts3Cursor *pCsr;               /* FTS3 Cursor */
50   int nPhrase;                    /* Number of phrases seen so far */
51   int nToken;                     /* Number of tokens seen so far */
52 };
53 
54 /*
55 ** The following types are used as part of the implementation of the
56 ** fts3BestSnippet() routine.
57 */
58 typedef struct SnippetIter SnippetIter;
59 typedef struct SnippetPhrase SnippetPhrase;
60 typedef struct SnippetFragment SnippetFragment;
61 
62 struct SnippetIter {
63   Fts3Cursor *pCsr;               /* Cursor snippet is being generated from */
64   int iCol;                       /* Extract snippet from this column */
65   int nSnippet;                   /* Requested snippet length (in tokens) */
66   int nPhrase;                    /* Number of phrases in query */
67   SnippetPhrase *aPhrase;         /* Array of size nPhrase */
68   int iCurrent;                   /* First token of current snippet */
69 };
70 
71 struct SnippetPhrase {
72   int nToken;                     /* Number of tokens in phrase */
73   char *pList;                    /* Pointer to start of phrase position list */
74   i64 iHead;                      /* Next value in position list */
75   char *pHead;                    /* Position list data following iHead */
76   i64 iTail;                      /* Next value in trailing position list */
77   char *pTail;                    /* Position list data following iTail */
78 };
79 
80 struct SnippetFragment {
81   int iCol;                       /* Column snippet is extracted from */
82   int iPos;                       /* Index of first token in snippet */
83   u64 covered;                    /* Mask of query phrases covered */
84   u64 hlmask;                     /* Mask of snippet terms to highlight */
85 };
86 
87 /*
88 ** This type is used as an fts3ExprIterate() context object while
89 ** accumulating the data returned by the matchinfo() function.
90 */
91 typedef struct MatchInfo MatchInfo;
92 struct MatchInfo {
93   Fts3Cursor *pCursor;            /* FTS3 Cursor */
94   int nCol;                       /* Number of columns in table */
95   int nPhrase;                    /* Number of matchable phrases in query */
96   sqlite3_int64 nDoc;             /* Number of docs in database */
97   char flag;
98   u32 *aMatchinfo;                /* Pre-allocated buffer */
99 };
100 
101 /*
102 ** An instance of this structure is used to manage a pair of buffers, each
103 ** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
104 ** for details.
105 */
106 struct MatchinfoBuffer {
107   u8 aRef[3];
108   int nElem;
109   int bGlobal;                    /* Set if global data is loaded */
110   char *zMatchinfo;
111   u32 aMatchinfo[1];
112 };
113 
114 
115 /*
116 ** The snippet() and offsets() functions both return text values. An instance
117 ** of the following structure is used to accumulate those values while the
118 ** functions are running. See fts3StringAppend() for details.
119 */
120 typedef struct StrBuffer StrBuffer;
121 struct StrBuffer {
122   char *z;                        /* Pointer to buffer containing string */
123   int n;                          /* Length of z in bytes (excl. nul-term) */
124   int nAlloc;                     /* Allocated size of buffer z in bytes */
125 };
126 
127 
128 /*************************************************************************
129 ** Start of MatchinfoBuffer code.
130 */
131 
132 /*
133 ** Allocate a two-slot MatchinfoBuffer object.
134 */
fts3MIBufferNew(size_t nElem,const char * zMatchinfo)135 static MatchinfoBuffer *fts3MIBufferNew(size_t nElem, const char *zMatchinfo){
136   MatchinfoBuffer *pRet;
137   sqlite3_int64 nByte = sizeof(u32) * (2*(sqlite3_int64)nElem + 1)
138                            + sizeof(MatchinfoBuffer);
139   sqlite3_int64 nStr = strlen(zMatchinfo);
140 
141   pRet = sqlite3Fts3MallocZero(nByte + nStr+1);
142   if( pRet ){
143     pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
144     pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0]
145                                       + sizeof(u32)*((int)nElem+1);
146     pRet->nElem = (int)nElem;
147     pRet->zMatchinfo = ((char*)pRet) + nByte;
148     memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
149     pRet->aRef[0] = 1;
150   }
151 
152   return pRet;
153 }
154 
fts3MIBufferFree(void * p)155 static void fts3MIBufferFree(void *p){
156   MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
157 
158   assert( (u32*)p==&pBuf->aMatchinfo[1]
159        || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
160   );
161   if( (u32*)p==&pBuf->aMatchinfo[1] ){
162     pBuf->aRef[1] = 0;
163   }else{
164     pBuf->aRef[2] = 0;
165   }
166 
167   if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
168     sqlite3_free(pBuf);
169   }
170 }
171 
fts3MIBufferAlloc(MatchinfoBuffer * p,u32 ** paOut)172 static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
173   void (*xRet)(void*) = 0;
174   u32 *aOut = 0;
175 
176   if( p->aRef[1]==0 ){
177     p->aRef[1] = 1;
178     aOut = &p->aMatchinfo[1];
179     xRet = fts3MIBufferFree;
180   }
181   else if( p->aRef[2]==0 ){
182     p->aRef[2] = 1;
183     aOut = &p->aMatchinfo[p->nElem+2];
184     xRet = fts3MIBufferFree;
185   }else{
186     aOut = (u32*)sqlite3_malloc64(p->nElem * sizeof(u32));
187     if( aOut ){
188       xRet = sqlite3_free;
189       if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
190     }
191   }
192 
193   *paOut = aOut;
194   return xRet;
195 }
196 
fts3MIBufferSetGlobal(MatchinfoBuffer * p)197 static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
198   p->bGlobal = 1;
199   memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
200 }
201 
202 /*
203 ** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
204 */
sqlite3Fts3MIBufferFree(MatchinfoBuffer * p)205 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
206   if( p ){
207     assert( p->aRef[0]==1 );
208     p->aRef[0] = 0;
209     if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
210       sqlite3_free(p);
211     }
212   }
213 }
214 
215 /*
216 ** End of MatchinfoBuffer code.
217 *************************************************************************/
218 
219 
220 /*
221 ** This function is used to help iterate through a position-list. A position
222 ** list is a list of unique integers, sorted from smallest to largest. Each
223 ** element of the list is represented by an FTS3 varint that takes the value
224 ** of the difference between the current element and the previous one plus
225 ** two. For example, to store the position-list:
226 **
227 **     4 9 113
228 **
229 ** the three varints:
230 **
231 **     6 7 106
232 **
233 ** are encoded.
234 **
235 ** When this function is called, *pp points to the start of an element of
236 ** the list. *piPos contains the value of the previous entry in the list.
237 ** After it returns, *piPos contains the value of the next element of the
238 ** list and *pp is advanced to the following varint.
239 */
fts3GetDeltaPosition(char ** pp,i64 * piPos)240 static void fts3GetDeltaPosition(char **pp, i64 *piPos){
241   int iVal;
242   *pp += fts3GetVarint32(*pp, &iVal);
243   *piPos += (iVal-2);
244 }
245 
246 /*
247 ** Helper function for fts3ExprIterate() (see below).
248 */
fts3ExprIterate2(Fts3Expr * pExpr,int * piPhrase,int (* x)(Fts3Expr *,int,void *),void * pCtx)249 static int fts3ExprIterate2(
250   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
251   int *piPhrase,                  /* Pointer to phrase counter */
252   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
253   void *pCtx                      /* Second argument to pass to callback */
254 ){
255   int rc;                         /* Return code */
256   int eType = pExpr->eType;     /* Type of expression node pExpr */
257 
258   if( eType!=FTSQUERY_PHRASE ){
259     assert( pExpr->pLeft && pExpr->pRight );
260     rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
261     if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
262       rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
263     }
264   }else{
265     rc = x(pExpr, *piPhrase, pCtx);
266     (*piPhrase)++;
267   }
268   return rc;
269 }
270 
271 /*
272 ** Iterate through all phrase nodes in an FTS3 query, except those that
273 ** are part of a sub-tree that is the right-hand-side of a NOT operator.
274 ** For each phrase node found, the supplied callback function is invoked.
275 **
276 ** If the callback function returns anything other than SQLITE_OK,
277 ** the iteration is abandoned and the error code returned immediately.
278 ** Otherwise, SQLITE_OK is returned after a callback has been made for
279 ** all eligible phrase nodes.
280 */
fts3ExprIterate(Fts3Expr * pExpr,int (* x)(Fts3Expr *,int,void *),void * pCtx)281 static int fts3ExprIterate(
282   Fts3Expr *pExpr,                /* Expression to iterate phrases of */
283   int (*x)(Fts3Expr*,int,void*),  /* Callback function to invoke for phrases */
284   void *pCtx                      /* Second argument to pass to callback */
285 ){
286   int iPhrase = 0;                /* Variable used as the phrase counter */
287   return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
288 }
289 
290 
291 /*
292 ** This is an fts3ExprIterate() callback used while loading the doclists
293 ** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
294 ** fts3ExprLoadDoclists().
295 */
fts3ExprLoadDoclistsCb(Fts3Expr * pExpr,int iPhrase,void * ctx)296 static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
297   int rc = SQLITE_OK;
298   Fts3Phrase *pPhrase = pExpr->pPhrase;
299   LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
300 
301   UNUSED_PARAMETER(iPhrase);
302 
303   p->nPhrase++;
304   p->nToken += pPhrase->nToken;
305 
306   return rc;
307 }
308 
309 /*
310 ** Load the doclists for each phrase in the query associated with FTS3 cursor
311 ** pCsr.
312 **
313 ** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
314 ** phrases in the expression (all phrases except those directly or
315 ** indirectly descended from the right-hand-side of a NOT operator). If
316 ** pnToken is not NULL, then it is set to the number of tokens in all
317 ** matchable phrases of the expression.
318 */
fts3ExprLoadDoclists(Fts3Cursor * pCsr,int * pnPhrase,int * pnToken)319 static int fts3ExprLoadDoclists(
320   Fts3Cursor *pCsr,               /* Fts3 cursor for current query */
321   int *pnPhrase,                  /* OUT: Number of phrases in query */
322   int *pnToken                    /* OUT: Number of tokens in query */
323 ){
324   int rc;                         /* Return Code */
325   LoadDoclistCtx sCtx = {0,0,0};  /* Context for fts3ExprIterate() */
326   sCtx.pCsr = pCsr;
327   rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
328   if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
329   if( pnToken ) *pnToken = sCtx.nToken;
330   return rc;
331 }
332 
fts3ExprPhraseCountCb(Fts3Expr * pExpr,int iPhrase,void * ctx)333 static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
334   (*(int *)ctx)++;
335   pExpr->iPhrase = iPhrase;
336   return SQLITE_OK;
337 }
fts3ExprPhraseCount(Fts3Expr * pExpr)338 static int fts3ExprPhraseCount(Fts3Expr *pExpr){
339   int nPhrase = 0;
340   (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
341   return nPhrase;
342 }
343 
344 /*
345 ** Advance the position list iterator specified by the first two
346 ** arguments so that it points to the first element with a value greater
347 ** than or equal to parameter iNext.
348 */
fts3SnippetAdvance(char ** ppIter,i64 * piIter,int iNext)349 static void fts3SnippetAdvance(char **ppIter, i64 *piIter, int iNext){
350   char *pIter = *ppIter;
351   if( pIter ){
352     i64 iIter = *piIter;
353 
354     while( iIter<iNext ){
355       if( 0==(*pIter & 0xFE) ){
356         iIter = -1;
357         pIter = 0;
358         break;
359       }
360       fts3GetDeltaPosition(&pIter, &iIter);
361     }
362 
363     *piIter = iIter;
364     *ppIter = pIter;
365   }
366 }
367 
368 /*
369 ** Advance the snippet iterator to the next candidate snippet.
370 */
fts3SnippetNextCandidate(SnippetIter * pIter)371 static int fts3SnippetNextCandidate(SnippetIter *pIter){
372   int i;                          /* Loop counter */
373 
374   if( pIter->iCurrent<0 ){
375     /* The SnippetIter object has just been initialized. The first snippet
376     ** candidate always starts at offset 0 (even if this candidate has a
377     ** score of 0.0).
378     */
379     pIter->iCurrent = 0;
380 
381     /* Advance the 'head' iterator of each phrase to the first offset that
382     ** is greater than or equal to (iNext+nSnippet).
383     */
384     for(i=0; i<pIter->nPhrase; i++){
385       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
386       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
387     }
388   }else{
389     int iStart;
390     int iEnd = 0x7FFFFFFF;
391 
392     for(i=0; i<pIter->nPhrase; i++){
393       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
394       if( pPhrase->pHead && pPhrase->iHead<iEnd ){
395         iEnd = pPhrase->iHead;
396       }
397     }
398     if( iEnd==0x7FFFFFFF ){
399       return 1;
400     }
401 
402     pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
403     for(i=0; i<pIter->nPhrase; i++){
404       SnippetPhrase *pPhrase = &pIter->aPhrase[i];
405       fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
406       fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
407     }
408   }
409 
410   return 0;
411 }
412 
413 /*
414 ** Retrieve information about the current candidate snippet of snippet
415 ** iterator pIter.
416 */
fts3SnippetDetails(SnippetIter * pIter,u64 mCovered,int * piToken,int * piScore,u64 * pmCover,u64 * pmHighlight)417 static void fts3SnippetDetails(
418   SnippetIter *pIter,             /* Snippet iterator */
419   u64 mCovered,                   /* Bitmask of phrases already covered */
420   int *piToken,                   /* OUT: First token of proposed snippet */
421   int *piScore,                   /* OUT: "Score" for this snippet */
422   u64 *pmCover,                   /* OUT: Bitmask of phrases covered */
423   u64 *pmHighlight                /* OUT: Bitmask of terms to highlight */
424 ){
425   int iStart = pIter->iCurrent;   /* First token of snippet */
426   int iScore = 0;                 /* Score of this snippet */
427   int i;                          /* Loop counter */
428   u64 mCover = 0;                 /* Mask of phrases covered by this snippet */
429   u64 mHighlight = 0;             /* Mask of tokens to highlight in snippet */
430 
431   for(i=0; i<pIter->nPhrase; i++){
432     SnippetPhrase *pPhrase = &pIter->aPhrase[i];
433     if( pPhrase->pTail ){
434       char *pCsr = pPhrase->pTail;
435       i64 iCsr = pPhrase->iTail;
436 
437       while( iCsr<(iStart+pIter->nSnippet) && iCsr>=iStart ){
438         int j;
439         u64 mPhrase = (u64)1 << (i%64);
440         u64 mPos = (u64)1 << (iCsr - iStart);
441         assert( iCsr>=iStart && (iCsr - iStart)<=64 );
442         assert( i>=0 );
443         if( (mCover|mCovered)&mPhrase ){
444           iScore++;
445         }else{
446           iScore += 1000;
447         }
448         mCover |= mPhrase;
449 
450         for(j=0; j<pPhrase->nToken; j++){
451           mHighlight |= (mPos>>j);
452         }
453 
454         if( 0==(*pCsr & 0x0FE) ) break;
455         fts3GetDeltaPosition(&pCsr, &iCsr);
456       }
457     }
458   }
459 
460   /* Set the output variables before returning. */
461   *piToken = iStart;
462   *piScore = iScore;
463   *pmCover = mCover;
464   *pmHighlight = mHighlight;
465 }
466 
467 /*
468 ** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
469 ** Each invocation populates an element of the SnippetIter.aPhrase[] array.
470 */
fts3SnippetFindPositions(Fts3Expr * pExpr,int iPhrase,void * ctx)471 static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
472   SnippetIter *p = (SnippetIter *)ctx;
473   SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
474   char *pCsr;
475   int rc;
476 
477   pPhrase->nToken = pExpr->pPhrase->nToken;
478   rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
479   assert( rc==SQLITE_OK || pCsr==0 );
480   if( pCsr ){
481     i64 iFirst = 0;
482     pPhrase->pList = pCsr;
483     fts3GetDeltaPosition(&pCsr, &iFirst);
484     if( iFirst<0 ){
485       rc = FTS_CORRUPT_VTAB;
486     }else{
487       pPhrase->pHead = pCsr;
488       pPhrase->pTail = pCsr;
489       pPhrase->iHead = iFirst;
490       pPhrase->iTail = iFirst;
491     }
492   }else{
493     assert( rc!=SQLITE_OK || (
494        pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
495     ));
496   }
497 
498   return rc;
499 }
500 
501 /*
502 ** Select the fragment of text consisting of nFragment contiguous tokens
503 ** from column iCol that represent the "best" snippet. The best snippet
504 ** is the snippet with the highest score, where scores are calculated
505 ** by adding:
506 **
507 **   (a) +1 point for each occurrence of a matchable phrase in the snippet.
508 **
509 **   (b) +1000 points for the first occurrence of each matchable phrase in
510 **       the snippet for which the corresponding mCovered bit is not set.
511 **
512 ** The selected snippet parameters are stored in structure *pFragment before
513 ** returning. The score of the selected snippet is stored in *piScore
514 ** before returning.
515 */
fts3BestSnippet(int nSnippet,Fts3Cursor * pCsr,int iCol,u64 mCovered,u64 * pmSeen,SnippetFragment * pFragment,int * piScore)516 static int fts3BestSnippet(
517   int nSnippet,                   /* Desired snippet length */
518   Fts3Cursor *pCsr,               /* Cursor to create snippet for */
519   int iCol,                       /* Index of column to create snippet from */
520   u64 mCovered,                   /* Mask of phrases already covered */
521   u64 *pmSeen,                    /* IN/OUT: Mask of phrases seen */
522   SnippetFragment *pFragment,     /* OUT: Best snippet found */
523   int *piScore                    /* OUT: Score of snippet pFragment */
524 ){
525   int rc;                         /* Return Code */
526   int nList;                      /* Number of phrases in expression */
527   SnippetIter sIter;              /* Iterates through snippet candidates */
528   sqlite3_int64 nByte;            /* Number of bytes of space to allocate */
529   int iBestScore = -1;            /* Best snippet score found so far */
530   int i;                          /* Loop counter */
531 
532   memset(&sIter, 0, sizeof(sIter));
533 
534   /* Iterate through the phrases in the expression to count them. The same
535   ** callback makes sure the doclists are loaded for each phrase.
536   */
537   rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
538   if( rc!=SQLITE_OK ){
539     return rc;
540   }
541 
542   /* Now that it is known how many phrases there are, allocate and zero
543   ** the required space using malloc().
544   */
545   nByte = sizeof(SnippetPhrase) * nList;
546   sIter.aPhrase = (SnippetPhrase *)sqlite3Fts3MallocZero(nByte);
547   if( !sIter.aPhrase ){
548     return SQLITE_NOMEM;
549   }
550 
551   /* Initialize the contents of the SnippetIter object. Then iterate through
552   ** the set of phrases in the expression to populate the aPhrase[] array.
553   */
554   sIter.pCsr = pCsr;
555   sIter.iCol = iCol;
556   sIter.nSnippet = nSnippet;
557   sIter.nPhrase = nList;
558   sIter.iCurrent = -1;
559   rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
560   if( rc==SQLITE_OK ){
561 
562     /* Set the *pmSeen output variable. */
563     for(i=0; i<nList; i++){
564       if( sIter.aPhrase[i].pHead ){
565         *pmSeen |= (u64)1 << (i%64);
566       }
567     }
568 
569     /* Loop through all candidate snippets. Store the best snippet in
570      ** *pFragment. Store its associated 'score' in iBestScore.
571      */
572     pFragment->iCol = iCol;
573     while( !fts3SnippetNextCandidate(&sIter) ){
574       int iPos;
575       int iScore;
576       u64 mCover;
577       u64 mHighlite;
578       fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
579       assert( iScore>=0 );
580       if( iScore>iBestScore ){
581         pFragment->iPos = iPos;
582         pFragment->hlmask = mHighlite;
583         pFragment->covered = mCover;
584         iBestScore = iScore;
585       }
586     }
587 
588     *piScore = iBestScore;
589   }
590   sqlite3_free(sIter.aPhrase);
591   return rc;
592 }
593 
594 
595 /*
596 ** Append a string to the string-buffer passed as the first argument.
597 **
598 ** If nAppend is negative, then the length of the string zAppend is
599 ** determined using strlen().
600 */
fts3StringAppend(StrBuffer * pStr,const char * zAppend,int nAppend)601 static int fts3StringAppend(
602   StrBuffer *pStr,                /* Buffer to append to */
603   const char *zAppend,            /* Pointer to data to append to buffer */
604   int nAppend                     /* Size of zAppend in bytes (or -1) */
605 ){
606   if( nAppend<0 ){
607     nAppend = (int)strlen(zAppend);
608   }
609 
610   /* If there is insufficient space allocated at StrBuffer.z, use realloc()
611   ** to grow the buffer until so that it is big enough to accomadate the
612   ** appended data.
613   */
614   if( pStr->n+nAppend+1>=pStr->nAlloc ){
615     sqlite3_int64 nAlloc = pStr->nAlloc+(sqlite3_int64)nAppend+100;
616     char *zNew = sqlite3_realloc64(pStr->z, nAlloc);
617     if( !zNew ){
618       return SQLITE_NOMEM;
619     }
620     pStr->z = zNew;
621     pStr->nAlloc = nAlloc;
622   }
623   assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
624 
625   /* Append the data to the string buffer. */
626   memcpy(&pStr->z[pStr->n], zAppend, nAppend);
627   pStr->n += nAppend;
628   pStr->z[pStr->n] = '\0';
629 
630   return SQLITE_OK;
631 }
632 
633 /*
634 ** The fts3BestSnippet() function often selects snippets that end with a
635 ** query term. That is, the final term of the snippet is always a term
636 ** that requires highlighting. For example, if 'X' is a highlighted term
637 ** and '.' is a non-highlighted term, BestSnippet() may select:
638 **
639 **     ........X.....X
640 **
641 ** This function "shifts" the beginning of the snippet forward in the
642 ** document so that there are approximately the same number of
643 ** non-highlighted terms to the right of the final highlighted term as there
644 ** are to the left of the first highlighted term. For example, to this:
645 **
646 **     ....X.....X....
647 **
648 ** This is done as part of extracting the snippet text, not when selecting
649 ** the snippet. Snippet selection is done based on doclists only, so there
650 ** is no way for fts3BestSnippet() to know whether or not the document
651 ** actually contains terms that follow the final highlighted term.
652 */
fts3SnippetShift(Fts3Table * pTab,int iLangid,int nSnippet,const char * zDoc,int nDoc,int * piPos,u64 * pHlmask)653 static int fts3SnippetShift(
654   Fts3Table *pTab,                /* FTS3 table snippet comes from */
655   int iLangid,                    /* Language id to use in tokenizing */
656   int nSnippet,                   /* Number of tokens desired for snippet */
657   const char *zDoc,               /* Document text to extract snippet from */
658   int nDoc,                       /* Size of buffer zDoc in bytes */
659   int *piPos,                     /* IN/OUT: First token of snippet */
660   u64 *pHlmask                    /* IN/OUT: Mask of tokens to highlight */
661 ){
662   u64 hlmask = *pHlmask;          /* Local copy of initial highlight-mask */
663 
664   if( hlmask ){
665     int nLeft;                    /* Tokens to the left of first highlight */
666     int nRight;                   /* Tokens to the right of last highlight */
667     int nDesired;                 /* Ideal number of tokens to shift forward */
668 
669     for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
670     for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
671     assert( (nSnippet-1-nRight)<=63 && (nSnippet-1-nRight)>=0 );
672     nDesired = (nLeft-nRight)/2;
673 
674     /* Ideally, the start of the snippet should be pushed forward in the
675     ** document nDesired tokens. This block checks if there are actually
676     ** nDesired tokens to the right of the snippet. If so, *piPos and
677     ** *pHlMask are updated to shift the snippet nDesired tokens to the
678     ** right. Otherwise, the snippet is shifted by the number of tokens
679     ** available.
680     */
681     if( nDesired>0 ){
682       int nShift;                 /* Number of tokens to shift snippet by */
683       int iCurrent = 0;           /* Token counter */
684       int rc;                     /* Return Code */
685       sqlite3_tokenizer_module *pMod;
686       sqlite3_tokenizer_cursor *pC;
687       pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
688 
689       /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
690       ** or more tokens in zDoc/nDoc.
691       */
692       rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
693       if( rc!=SQLITE_OK ){
694         return rc;
695       }
696       while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
697         const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
698         rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
699       }
700       pMod->xClose(pC);
701       if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
702 
703       nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
704       assert( nShift<=nDesired );
705       if( nShift>0 ){
706         *piPos += nShift;
707         *pHlmask = hlmask >> nShift;
708       }
709     }
710   }
711   return SQLITE_OK;
712 }
713 
714 /*
715 ** Extract the snippet text for fragment pFragment from cursor pCsr and
716 ** append it to string buffer pOut.
717 */
fts3SnippetText(Fts3Cursor * pCsr,SnippetFragment * pFragment,int iFragment,int isLast,int nSnippet,const char * zOpen,const char * zClose,const char * zEllipsis,StrBuffer * pOut)718 static int fts3SnippetText(
719   Fts3Cursor *pCsr,               /* FTS3 Cursor */
720   SnippetFragment *pFragment,     /* Snippet to extract */
721   int iFragment,                  /* Fragment number */
722   int isLast,                     /* True for final fragment in snippet */
723   int nSnippet,                   /* Number of tokens in extracted snippet */
724   const char *zOpen,              /* String inserted before highlighted term */
725   const char *zClose,             /* String inserted after highlighted term */
726   const char *zEllipsis,          /* String inserted between snippets */
727   StrBuffer *pOut                 /* Write output here */
728 ){
729   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
730   int rc;                         /* Return code */
731   const char *zDoc;               /* Document text to extract snippet from */
732   int nDoc;                       /* Size of zDoc in bytes */
733   int iCurrent = 0;               /* Current token number of document */
734   int iEnd = 0;                   /* Byte offset of end of current token */
735   int isShiftDone = 0;            /* True after snippet is shifted */
736   int iPos = pFragment->iPos;     /* First token of snippet */
737   u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
738   int iCol = pFragment->iCol+1;   /* Query column to extract text from */
739   sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
740   sqlite3_tokenizer_cursor *pC;   /* Tokenizer cursor open on zDoc/nDoc */
741 
742   zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
743   if( zDoc==0 ){
744     if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
745       return SQLITE_NOMEM;
746     }
747     return SQLITE_OK;
748   }
749   nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
750 
751   /* Open a token cursor on the document. */
752   pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
753   rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
754   if( rc!=SQLITE_OK ){
755     return rc;
756   }
757 
758   while( rc==SQLITE_OK ){
759     const char *ZDUMMY;           /* Dummy argument used with tokenizer */
760     int DUMMY1 = -1;              /* Dummy argument used with tokenizer */
761     int iBegin = 0;               /* Offset in zDoc of start of token */
762     int iFin = 0;                 /* Offset in zDoc of end of token */
763     int isHighlight = 0;          /* True for highlighted terms */
764 
765     /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
766     ** in the FTS code the variable that the third argument to xNext points to
767     ** is initialized to zero before the first (*but not necessarily
768     ** subsequent*) call to xNext(). This is done for a particular application
769     ** that needs to know whether or not the tokenizer is being used for
770     ** snippet generation or for some other purpose.
771     **
772     ** Extreme care is required when writing code to depend on this
773     ** initialization. It is not a documented part of the tokenizer interface.
774     ** If a tokenizer is used directly by any code outside of FTS, this
775     ** convention might not be respected.  */
776     rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
777     if( rc!=SQLITE_OK ){
778       if( rc==SQLITE_DONE ){
779         /* Special case - the last token of the snippet is also the last token
780         ** of the column. Append any punctuation that occurred between the end
781         ** of the previous token and the end of the document to the output.
782         ** Then break out of the loop. */
783         rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
784       }
785       break;
786     }
787     if( iCurrent<iPos ){ continue; }
788 
789     if( !isShiftDone ){
790       int n = nDoc - iBegin;
791       rc = fts3SnippetShift(
792           pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
793       );
794       isShiftDone = 1;
795 
796       /* Now that the shift has been done, check if the initial "..." are
797       ** required. They are required if (a) this is not the first fragment,
798       ** or (b) this fragment does not begin at position 0 of its column.
799       */
800       if( rc==SQLITE_OK ){
801         if( iPos>0 || iFragment>0 ){
802           rc = fts3StringAppend(pOut, zEllipsis, -1);
803         }else if( iBegin ){
804           rc = fts3StringAppend(pOut, zDoc, iBegin);
805         }
806       }
807       if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
808     }
809 
810     if( iCurrent>=(iPos+nSnippet) ){
811       if( isLast ){
812         rc = fts3StringAppend(pOut, zEllipsis, -1);
813       }
814       break;
815     }
816 
817     /* Set isHighlight to true if this term should be highlighted. */
818     isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
819 
820     if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
821     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
822     if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
823     if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
824 
825     iEnd = iFin;
826   }
827 
828   pMod->xClose(pC);
829   return rc;
830 }
831 
832 
833 /*
834 ** This function is used to count the entries in a column-list (a
835 ** delta-encoded list of term offsets within a single column of a single
836 ** row). When this function is called, *ppCollist should point to the
837 ** beginning of the first varint in the column-list (the varint that
838 ** contains the position of the first matching term in the column data).
839 ** Before returning, *ppCollist is set to point to the first byte after
840 ** the last varint in the column-list (either the 0x00 signifying the end
841 ** of the position-list, or the 0x01 that precedes the column number of
842 ** the next column in the position-list).
843 **
844 ** The number of elements in the column-list is returned.
845 */
fts3ColumnlistCount(char ** ppCollist)846 static int fts3ColumnlistCount(char **ppCollist){
847   char *pEnd = *ppCollist;
848   char c = 0;
849   int nEntry = 0;
850 
851   /* A column-list is terminated by either a 0x01 or 0x00. */
852   while( 0xFE & (*pEnd | c) ){
853     c = *pEnd++ & 0x80;
854     if( !c ) nEntry++;
855   }
856 
857   *ppCollist = pEnd;
858   return nEntry;
859 }
860 
861 /*
862 ** This function gathers 'y' or 'b' data for a single phrase.
863 */
fts3ExprLHits(Fts3Expr * pExpr,MatchInfo * p)864 static int fts3ExprLHits(
865   Fts3Expr *pExpr,                /* Phrase expression node */
866   MatchInfo *p                    /* Matchinfo context */
867 ){
868   Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
869   int iStart;
870   Fts3Phrase *pPhrase = pExpr->pPhrase;
871   char *pIter = pPhrase->doclist.pList;
872   int iCol = 0;
873 
874   assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
875   if( p->flag==FTS3_MATCHINFO_LHITS ){
876     iStart = pExpr->iPhrase * p->nCol;
877   }else{
878     iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
879   }
880 
881   if( pIter ) while( 1 ){
882     int nHit = fts3ColumnlistCount(&pIter);
883     if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
884       if( p->flag==FTS3_MATCHINFO_LHITS ){
885         p->aMatchinfo[iStart + iCol] = (u32)nHit;
886       }else if( nHit ){
887         p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
888       }
889     }
890     assert( *pIter==0x00 || *pIter==0x01 );
891     if( *pIter!=0x01 ) break;
892     pIter++;
893     pIter += fts3GetVarint32(pIter, &iCol);
894     if( iCol>=p->nCol ) return FTS_CORRUPT_VTAB;
895   }
896   return SQLITE_OK;
897 }
898 
899 /*
900 ** Gather the results for matchinfo directives 'y' and 'b'.
901 */
fts3ExprLHitGather(Fts3Expr * pExpr,MatchInfo * p)902 static int fts3ExprLHitGather(
903   Fts3Expr *pExpr,
904   MatchInfo *p
905 ){
906   int rc = SQLITE_OK;
907   assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
908   if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
909     if( pExpr->pLeft ){
910       rc = fts3ExprLHitGather(pExpr->pLeft, p);
911       if( rc==SQLITE_OK ) rc = fts3ExprLHitGather(pExpr->pRight, p);
912     }else{
913       rc = fts3ExprLHits(pExpr, p);
914     }
915   }
916   return rc;
917 }
918 
919 /*
920 ** fts3ExprIterate() callback used to collect the "global" matchinfo stats
921 ** for a single query.
922 **
923 ** fts3ExprIterate() callback to load the 'global' elements of a
924 ** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
925 ** of the matchinfo array that are constant for all rows returned by the
926 ** current query.
927 **
928 ** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
929 ** function populates Matchinfo.aMatchinfo[] as follows:
930 **
931 **   for(iCol=0; iCol<nCol; iCol++){
932 **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
933 **     aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
934 **   }
935 **
936 ** where X is the number of matches for phrase iPhrase is column iCol of all
937 ** rows of the table. Y is the number of rows for which column iCol contains
938 ** at least one instance of phrase iPhrase.
939 **
940 ** If the phrase pExpr consists entirely of deferred tokens, then all X and
941 ** Y values are set to nDoc, where nDoc is the number of documents in the
942 ** file system. This is done because the full-text index doclist is required
943 ** to calculate these values properly, and the full-text index doclist is
944 ** not available for deferred tokens.
945 */
fts3ExprGlobalHitsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)946 static int fts3ExprGlobalHitsCb(
947   Fts3Expr *pExpr,                /* Phrase expression node */
948   int iPhrase,                    /* Phrase number (numbered from zero) */
949   void *pCtx                      /* Pointer to MatchInfo structure */
950 ){
951   MatchInfo *p = (MatchInfo *)pCtx;
952   return sqlite3Fts3EvalPhraseStats(
953       p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
954   );
955 }
956 
957 /*
958 ** fts3ExprIterate() callback used to collect the "local" part of the
959 ** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
960 ** array that are different for each row returned by the query.
961 */
fts3ExprLocalHitsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)962 static int fts3ExprLocalHitsCb(
963   Fts3Expr *pExpr,                /* Phrase expression node */
964   int iPhrase,                    /* Phrase number */
965   void *pCtx                      /* Pointer to MatchInfo structure */
966 ){
967   int rc = SQLITE_OK;
968   MatchInfo *p = (MatchInfo *)pCtx;
969   int iStart = iPhrase * p->nCol * 3;
970   int i;
971 
972   for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
973     char *pCsr;
974     rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
975     if( pCsr ){
976       p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
977     }else{
978       p->aMatchinfo[iStart+i*3] = 0;
979     }
980   }
981 
982   return rc;
983 }
984 
fts3MatchinfoCheck(Fts3Table * pTab,char cArg,char ** pzErr)985 static int fts3MatchinfoCheck(
986   Fts3Table *pTab,
987   char cArg,
988   char **pzErr
989 ){
990   if( (cArg==FTS3_MATCHINFO_NPHRASE)
991    || (cArg==FTS3_MATCHINFO_NCOL)
992    || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
993    || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
994    || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
995    || (cArg==FTS3_MATCHINFO_LCS)
996    || (cArg==FTS3_MATCHINFO_HITS)
997    || (cArg==FTS3_MATCHINFO_LHITS)
998    || (cArg==FTS3_MATCHINFO_LHITS_BM)
999   ){
1000     return SQLITE_OK;
1001   }
1002   sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
1003   return SQLITE_ERROR;
1004 }
1005 
fts3MatchinfoSize(MatchInfo * pInfo,char cArg)1006 static size_t fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
1007   size_t nVal;                      /* Number of integers output by cArg */
1008 
1009   switch( cArg ){
1010     case FTS3_MATCHINFO_NDOC:
1011     case FTS3_MATCHINFO_NPHRASE:
1012     case FTS3_MATCHINFO_NCOL:
1013       nVal = 1;
1014       break;
1015 
1016     case FTS3_MATCHINFO_AVGLENGTH:
1017     case FTS3_MATCHINFO_LENGTH:
1018     case FTS3_MATCHINFO_LCS:
1019       nVal = pInfo->nCol;
1020       break;
1021 
1022     case FTS3_MATCHINFO_LHITS:
1023       nVal = pInfo->nCol * pInfo->nPhrase;
1024       break;
1025 
1026     case FTS3_MATCHINFO_LHITS_BM:
1027       nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
1028       break;
1029 
1030     default:
1031       assert( cArg==FTS3_MATCHINFO_HITS );
1032       nVal = pInfo->nCol * pInfo->nPhrase * 3;
1033       break;
1034   }
1035 
1036   return nVal;
1037 }
1038 
fts3MatchinfoSelectDoctotal(Fts3Table * pTab,sqlite3_stmt ** ppStmt,sqlite3_int64 * pnDoc,const char ** paLen,const char ** ppEnd)1039 static int fts3MatchinfoSelectDoctotal(
1040   Fts3Table *pTab,
1041   sqlite3_stmt **ppStmt,
1042   sqlite3_int64 *pnDoc,
1043   const char **paLen,
1044   const char **ppEnd
1045 ){
1046   sqlite3_stmt *pStmt;
1047   const char *a;
1048   const char *pEnd;
1049   sqlite3_int64 nDoc;
1050   int n;
1051 
1052 
1053   if( !*ppStmt ){
1054     int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
1055     if( rc!=SQLITE_OK ) return rc;
1056   }
1057   pStmt = *ppStmt;
1058   assert( sqlite3_data_count(pStmt)==1 );
1059 
1060   n = sqlite3_column_bytes(pStmt, 0);
1061   a = sqlite3_column_blob(pStmt, 0);
1062   if( a==0 ){
1063     return FTS_CORRUPT_VTAB;
1064   }
1065   pEnd = a + n;
1066   a += sqlite3Fts3GetVarintBounded(a, pEnd, &nDoc);
1067   if( nDoc<=0 || a>pEnd ){
1068     return FTS_CORRUPT_VTAB;
1069   }
1070   *pnDoc = nDoc;
1071 
1072   if( paLen ) *paLen = a;
1073   if( ppEnd ) *ppEnd = pEnd;
1074   return SQLITE_OK;
1075 }
1076 
1077 /*
1078 ** An instance of the following structure is used to store state while
1079 ** iterating through a multi-column position-list corresponding to the
1080 ** hits for a single phrase on a single row in order to calculate the
1081 ** values for a matchinfo() FTS3_MATCHINFO_LCS request.
1082 */
1083 typedef struct LcsIterator LcsIterator;
1084 struct LcsIterator {
1085   Fts3Expr *pExpr;                /* Pointer to phrase expression */
1086   int iPosOffset;                 /* Tokens count up to end of this phrase */
1087   char *pRead;                    /* Cursor used to iterate through aDoclist */
1088   int iPos;                       /* Current position */
1089 };
1090 
1091 /*
1092 ** If LcsIterator.iCol is set to the following value, the iterator has
1093 ** finished iterating through all offsets for all columns.
1094 */
1095 #define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
1096 
fts3MatchinfoLcsCb(Fts3Expr * pExpr,int iPhrase,void * pCtx)1097 static int fts3MatchinfoLcsCb(
1098   Fts3Expr *pExpr,                /* Phrase expression node */
1099   int iPhrase,                    /* Phrase number (numbered from zero) */
1100   void *pCtx                      /* Pointer to MatchInfo structure */
1101 ){
1102   LcsIterator *aIter = (LcsIterator *)pCtx;
1103   aIter[iPhrase].pExpr = pExpr;
1104   return SQLITE_OK;
1105 }
1106 
1107 /*
1108 ** Advance the iterator passed as an argument to the next position. Return
1109 ** 1 if the iterator is at EOF or if it now points to the start of the
1110 ** position list for the next column.
1111 */
fts3LcsIteratorAdvance(LcsIterator * pIter)1112 static int fts3LcsIteratorAdvance(LcsIterator *pIter){
1113   char *pRead;
1114   sqlite3_int64 iRead;
1115   int rc = 0;
1116 
1117   if( NEVER(pIter==0) ) return 1;
1118   pRead = pIter->pRead;
1119   pRead += sqlite3Fts3GetVarint(pRead, &iRead);
1120   if( iRead==0 || iRead==1 ){
1121     pRead = 0;
1122     rc = 1;
1123   }else{
1124     pIter->iPos += (int)(iRead-2);
1125   }
1126 
1127   pIter->pRead = pRead;
1128   return rc;
1129 }
1130 
1131 /*
1132 ** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
1133 **
1134 ** If the call is successful, the longest-common-substring lengths for each
1135 ** column are written into the first nCol elements of the pInfo->aMatchinfo[]
1136 ** array before returning. SQLITE_OK is returned in this case.
1137 **
1138 ** Otherwise, if an error occurs, an SQLite error code is returned and the
1139 ** data written to the first nCol elements of pInfo->aMatchinfo[] is
1140 ** undefined.
1141 */
fts3MatchinfoLcs(Fts3Cursor * pCsr,MatchInfo * pInfo)1142 static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
1143   LcsIterator *aIter;
1144   int i;
1145   int iCol;
1146   int nToken = 0;
1147   int rc = SQLITE_OK;
1148 
1149   /* Allocate and populate the array of LcsIterator objects. The array
1150   ** contains one element for each matchable phrase in the query.
1151   **/
1152   aIter = sqlite3Fts3MallocZero(sizeof(LcsIterator) * pCsr->nPhrase);
1153   if( !aIter ) return SQLITE_NOMEM;
1154   (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
1155 
1156   for(i=0; i<pInfo->nPhrase; i++){
1157     LcsIterator *pIter = &aIter[i];
1158     nToken -= pIter->pExpr->pPhrase->nToken;
1159     pIter->iPosOffset = nToken;
1160   }
1161 
1162   for(iCol=0; iCol<pInfo->nCol; iCol++){
1163     int nLcs = 0;                 /* LCS value for this column */
1164     int nLive = 0;                /* Number of iterators in aIter not at EOF */
1165 
1166     for(i=0; i<pInfo->nPhrase; i++){
1167       LcsIterator *pIt = &aIter[i];
1168       rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
1169       if( rc!=SQLITE_OK ) goto matchinfo_lcs_out;
1170       if( pIt->pRead ){
1171         pIt->iPos = pIt->iPosOffset;
1172         fts3LcsIteratorAdvance(pIt);
1173         if( pIt->pRead==0 ){
1174           rc = FTS_CORRUPT_VTAB;
1175           goto matchinfo_lcs_out;
1176         }
1177         nLive++;
1178       }
1179     }
1180 
1181     while( nLive>0 ){
1182       LcsIterator *pAdv = 0;      /* The iterator to advance by one position */
1183       int nThisLcs = 0;           /* LCS for the current iterator positions */
1184 
1185       for(i=0; i<pInfo->nPhrase; i++){
1186         LcsIterator *pIter = &aIter[i];
1187         if( pIter->pRead==0 ){
1188           /* This iterator is already at EOF for this column. */
1189           nThisLcs = 0;
1190         }else{
1191           if( pAdv==0 || pIter->iPos<pAdv->iPos ){
1192             pAdv = pIter;
1193           }
1194           if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
1195             nThisLcs++;
1196           }else{
1197             nThisLcs = 1;
1198           }
1199           if( nThisLcs>nLcs ) nLcs = nThisLcs;
1200         }
1201       }
1202       if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
1203     }
1204 
1205     pInfo->aMatchinfo[iCol] = nLcs;
1206   }
1207 
1208  matchinfo_lcs_out:
1209   sqlite3_free(aIter);
1210   return rc;
1211 }
1212 
1213 /*
1214 ** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
1215 ** be returned by the matchinfo() function. Argument zArg contains the
1216 ** format string passed as the second argument to matchinfo (or the
1217 ** default value "pcx" if no second argument was specified). The format
1218 ** string has already been validated and the pInfo->aMatchinfo[] array
1219 ** is guaranteed to be large enough for the output.
1220 **
1221 ** If bGlobal is true, then populate all fields of the matchinfo() output.
1222 ** If it is false, then assume that those fields that do not change between
1223 ** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
1224 ** have already been populated.
1225 **
1226 ** Return SQLITE_OK if successful, or an SQLite error code if an error
1227 ** occurs. If a value other than SQLITE_OK is returned, the state the
1228 ** pInfo->aMatchinfo[] buffer is left in is undefined.
1229 */
fts3MatchinfoValues(Fts3Cursor * pCsr,int bGlobal,MatchInfo * pInfo,const char * zArg)1230 static int fts3MatchinfoValues(
1231   Fts3Cursor *pCsr,               /* FTS3 cursor object */
1232   int bGlobal,                    /* True to grab the global stats */
1233   MatchInfo *pInfo,               /* Matchinfo context object */
1234   const char *zArg                /* Matchinfo format string */
1235 ){
1236   int rc = SQLITE_OK;
1237   int i;
1238   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1239   sqlite3_stmt *pSelect = 0;
1240 
1241   for(i=0; rc==SQLITE_OK && zArg[i]; i++){
1242     pInfo->flag = zArg[i];
1243     switch( zArg[i] ){
1244       case FTS3_MATCHINFO_NPHRASE:
1245         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
1246         break;
1247 
1248       case FTS3_MATCHINFO_NCOL:
1249         if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
1250         break;
1251 
1252       case FTS3_MATCHINFO_NDOC:
1253         if( bGlobal ){
1254           sqlite3_int64 nDoc = 0;
1255           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0, 0);
1256           pInfo->aMatchinfo[0] = (u32)nDoc;
1257         }
1258         break;
1259 
1260       case FTS3_MATCHINFO_AVGLENGTH:
1261         if( bGlobal ){
1262           sqlite3_int64 nDoc;     /* Number of rows in table */
1263           const char *a;          /* Aggregate column length array */
1264           const char *pEnd;       /* First byte past end of length array */
1265 
1266           rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a, &pEnd);
1267           if( rc==SQLITE_OK ){
1268             int iCol;
1269             for(iCol=0; iCol<pInfo->nCol; iCol++){
1270               u32 iVal;
1271               sqlite3_int64 nToken;
1272               a += sqlite3Fts3GetVarint(a, &nToken);
1273               if( a>pEnd ){
1274                 rc = SQLITE_CORRUPT_VTAB;
1275                 break;
1276               }
1277               iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
1278               pInfo->aMatchinfo[iCol] = iVal;
1279             }
1280           }
1281         }
1282         break;
1283 
1284       case FTS3_MATCHINFO_LENGTH: {
1285         sqlite3_stmt *pSelectDocsize = 0;
1286         rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
1287         if( rc==SQLITE_OK ){
1288           int iCol;
1289           const char *a = sqlite3_column_blob(pSelectDocsize, 0);
1290           const char *pEnd = a + sqlite3_column_bytes(pSelectDocsize, 0);
1291           for(iCol=0; iCol<pInfo->nCol; iCol++){
1292             sqlite3_int64 nToken;
1293             a += sqlite3Fts3GetVarintBounded(a, pEnd, &nToken);
1294             if( a>pEnd ){
1295               rc = SQLITE_CORRUPT_VTAB;
1296               break;
1297             }
1298             pInfo->aMatchinfo[iCol] = (u32)nToken;
1299           }
1300         }
1301         sqlite3_reset(pSelectDocsize);
1302         break;
1303       }
1304 
1305       case FTS3_MATCHINFO_LCS:
1306         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1307         if( rc==SQLITE_OK ){
1308           rc = fts3MatchinfoLcs(pCsr, pInfo);
1309         }
1310         break;
1311 
1312       case FTS3_MATCHINFO_LHITS_BM:
1313       case FTS3_MATCHINFO_LHITS: {
1314         size_t nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
1315         memset(pInfo->aMatchinfo, 0, nZero);
1316         rc = fts3ExprLHitGather(pCsr->pExpr, pInfo);
1317         break;
1318       }
1319 
1320       default: {
1321         Fts3Expr *pExpr;
1322         assert( zArg[i]==FTS3_MATCHINFO_HITS );
1323         pExpr = pCsr->pExpr;
1324         rc = fts3ExprLoadDoclists(pCsr, 0, 0);
1325         if( rc!=SQLITE_OK ) break;
1326         if( bGlobal ){
1327           if( pCsr->pDeferred ){
1328             rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc,0,0);
1329             if( rc!=SQLITE_OK ) break;
1330           }
1331           rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
1332           sqlite3Fts3EvalTestDeferred(pCsr, &rc);
1333           if( rc!=SQLITE_OK ) break;
1334         }
1335         (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
1336         break;
1337       }
1338     }
1339 
1340     pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
1341   }
1342 
1343   sqlite3_reset(pSelect);
1344   return rc;
1345 }
1346 
1347 
1348 /*
1349 ** Populate pCsr->aMatchinfo[] with data for the current row. The
1350 ** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
1351 */
fts3GetMatchinfo(sqlite3_context * pCtx,Fts3Cursor * pCsr,const char * zArg)1352 static void fts3GetMatchinfo(
1353   sqlite3_context *pCtx,        /* Return results here */
1354   Fts3Cursor *pCsr,               /* FTS3 Cursor object */
1355   const char *zArg                /* Second argument to matchinfo() function */
1356 ){
1357   MatchInfo sInfo;
1358   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1359   int rc = SQLITE_OK;
1360   int bGlobal = 0;                /* Collect 'global' stats as well as local */
1361 
1362   u32 *aOut = 0;
1363   void (*xDestroyOut)(void*) = 0;
1364 
1365   memset(&sInfo, 0, sizeof(MatchInfo));
1366   sInfo.pCursor = pCsr;
1367   sInfo.nCol = pTab->nColumn;
1368 
1369   /* If there is cached matchinfo() data, but the format string for the
1370   ** cache does not match the format string for this request, discard
1371   ** the cached data. */
1372   if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
1373     sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
1374     pCsr->pMIBuffer = 0;
1375   }
1376 
1377   /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
1378   ** matchinfo function has been called for this query. In this case
1379   ** allocate the array used to accumulate the matchinfo data and
1380   ** initialize those elements that are constant for every row.
1381   */
1382   if( pCsr->pMIBuffer==0 ){
1383     size_t nMatchinfo = 0;        /* Number of u32 elements in match-info */
1384     int i;                        /* Used to iterate through zArg */
1385 
1386     /* Determine the number of phrases in the query */
1387     pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
1388     sInfo.nPhrase = pCsr->nPhrase;
1389 
1390     /* Determine the number of integers in the buffer returned by this call. */
1391     for(i=0; zArg[i]; i++){
1392       char *zErr = 0;
1393       if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
1394         sqlite3_result_error(pCtx, zErr, -1);
1395         sqlite3_free(zErr);
1396         return;
1397       }
1398       nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
1399     }
1400 
1401     /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
1402     pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
1403     if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
1404 
1405     pCsr->isMatchinfoNeeded = 1;
1406     bGlobal = 1;
1407   }
1408 
1409   if( rc==SQLITE_OK ){
1410     xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
1411     if( xDestroyOut==0 ){
1412       rc = SQLITE_NOMEM;
1413     }
1414   }
1415 
1416   if( rc==SQLITE_OK ){
1417     sInfo.aMatchinfo = aOut;
1418     sInfo.nPhrase = pCsr->nPhrase;
1419     rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
1420     if( bGlobal ){
1421       fts3MIBufferSetGlobal(pCsr->pMIBuffer);
1422     }
1423   }
1424 
1425   if( rc!=SQLITE_OK ){
1426     sqlite3_result_error_code(pCtx, rc);
1427     if( xDestroyOut ) xDestroyOut(aOut);
1428   }else{
1429     int n = pCsr->pMIBuffer->nElem * sizeof(u32);
1430     sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
1431   }
1432 }
1433 
1434 /*
1435 ** Implementation of snippet() function.
1436 */
sqlite3Fts3Snippet(sqlite3_context * pCtx,Fts3Cursor * pCsr,const char * zStart,const char * zEnd,const char * zEllipsis,int iCol,int nToken)1437 void sqlite3Fts3Snippet(
1438   sqlite3_context *pCtx,          /* SQLite function call context */
1439   Fts3Cursor *pCsr,               /* Cursor object */
1440   const char *zStart,             /* Snippet start text - "<b>" */
1441   const char *zEnd,               /* Snippet end text - "</b>" */
1442   const char *zEllipsis,          /* Snippet ellipsis text - "<b>...</b>" */
1443   int iCol,                       /* Extract snippet from this column */
1444   int nToken                      /* Approximate number of tokens in snippet */
1445 ){
1446   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1447   int rc = SQLITE_OK;
1448   int i;
1449   StrBuffer res = {0, 0, 0};
1450 
1451   /* The returned text includes up to four fragments of text extracted from
1452   ** the data in the current row. The first iteration of the for(...) loop
1453   ** below attempts to locate a single fragment of text nToken tokens in
1454   ** size that contains at least one instance of all phrases in the query
1455   ** expression that appear in the current row. If such a fragment of text
1456   ** cannot be found, the second iteration of the loop attempts to locate
1457   ** a pair of fragments, and so on.
1458   */
1459   int nSnippet = 0;               /* Number of fragments in this snippet */
1460   SnippetFragment aSnippet[4];    /* Maximum of 4 fragments per snippet */
1461   int nFToken = -1;               /* Number of tokens in each fragment */
1462 
1463   if( !pCsr->pExpr ){
1464     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1465     return;
1466   }
1467 
1468   /* Limit the snippet length to 64 tokens. */
1469   if( nToken<-64 ) nToken = -64;
1470   if( nToken>+64 ) nToken = +64;
1471 
1472   for(nSnippet=1; 1; nSnippet++){
1473 
1474     int iSnip;                    /* Loop counter 0..nSnippet-1 */
1475     u64 mCovered = 0;             /* Bitmask of phrases covered by snippet */
1476     u64 mSeen = 0;                /* Bitmask of phrases seen by BestSnippet() */
1477 
1478     if( nToken>=0 ){
1479       nFToken = (nToken+nSnippet-1) / nSnippet;
1480     }else{
1481       nFToken = -1 * nToken;
1482     }
1483 
1484     for(iSnip=0; iSnip<nSnippet; iSnip++){
1485       int iBestScore = -1;        /* Best score of columns checked so far */
1486       int iRead;                  /* Used to iterate through columns */
1487       SnippetFragment *pFragment = &aSnippet[iSnip];
1488 
1489       memset(pFragment, 0, sizeof(*pFragment));
1490 
1491       /* Loop through all columns of the table being considered for snippets.
1492       ** If the iCol argument to this function was negative, this means all
1493       ** columns of the FTS3 table. Otherwise, only column iCol is considered.
1494       */
1495       for(iRead=0; iRead<pTab->nColumn; iRead++){
1496         SnippetFragment sF = {0, 0, 0, 0};
1497         int iS = 0;
1498         if( iCol>=0 && iRead!=iCol ) continue;
1499 
1500         /* Find the best snippet of nFToken tokens in column iRead. */
1501         rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
1502         if( rc!=SQLITE_OK ){
1503           goto snippet_out;
1504         }
1505         if( iS>iBestScore ){
1506           *pFragment = sF;
1507           iBestScore = iS;
1508         }
1509       }
1510 
1511       mCovered |= pFragment->covered;
1512     }
1513 
1514     /* If all query phrases seen by fts3BestSnippet() are present in at least
1515     ** one of the nSnippet snippet fragments, break out of the loop.
1516     */
1517     assert( (mCovered&mSeen)==mCovered );
1518     if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
1519   }
1520 
1521   assert( nFToken>0 );
1522 
1523   for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
1524     rc = fts3SnippetText(pCsr, &aSnippet[i],
1525         i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
1526     );
1527   }
1528 
1529  snippet_out:
1530   sqlite3Fts3SegmentsClose(pTab);
1531   if( rc!=SQLITE_OK ){
1532     sqlite3_result_error_code(pCtx, rc);
1533     sqlite3_free(res.z);
1534   }else{
1535     sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
1536   }
1537 }
1538 
1539 
1540 typedef struct TermOffset TermOffset;
1541 typedef struct TermOffsetCtx TermOffsetCtx;
1542 
1543 struct TermOffset {
1544   char *pList;                    /* Position-list */
1545   i64 iPos;                       /* Position just read from pList */
1546   i64 iOff;                       /* Offset of this term from read positions */
1547 };
1548 
1549 struct TermOffsetCtx {
1550   Fts3Cursor *pCsr;
1551   int iCol;                       /* Column of table to populate aTerm for */
1552   int iTerm;
1553   sqlite3_int64 iDocid;
1554   TermOffset *aTerm;
1555 };
1556 
1557 /*
1558 ** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
1559 */
fts3ExprTermOffsetInit(Fts3Expr * pExpr,int iPhrase,void * ctx)1560 static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
1561   TermOffsetCtx *p = (TermOffsetCtx *)ctx;
1562   int nTerm;                      /* Number of tokens in phrase */
1563   int iTerm;                      /* For looping through nTerm phrase terms */
1564   char *pList;                    /* Pointer to position list for phrase */
1565   i64 iPos = 0;                   /* First position in position-list */
1566   int rc;
1567 
1568   UNUSED_PARAMETER(iPhrase);
1569   rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
1570   nTerm = pExpr->pPhrase->nToken;
1571   if( pList ){
1572     fts3GetDeltaPosition(&pList, &iPos);
1573     assert_fts3_nc( iPos>=0 );
1574   }
1575 
1576   for(iTerm=0; iTerm<nTerm; iTerm++){
1577     TermOffset *pT = &p->aTerm[p->iTerm++];
1578     pT->iOff = nTerm-iTerm-1;
1579     pT->pList = pList;
1580     pT->iPos = iPos;
1581   }
1582 
1583   return rc;
1584 }
1585 
1586 /*
1587 ** Implementation of offsets() function.
1588 */
sqlite3Fts3Offsets(sqlite3_context * pCtx,Fts3Cursor * pCsr)1589 void sqlite3Fts3Offsets(
1590   sqlite3_context *pCtx,          /* SQLite function call context */
1591   Fts3Cursor *pCsr                /* Cursor object */
1592 ){
1593   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1594   sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
1595   int rc;                         /* Return Code */
1596   int nToken;                     /* Number of tokens in query */
1597   int iCol;                       /* Column currently being processed */
1598   StrBuffer res = {0, 0, 0};      /* Result string */
1599   TermOffsetCtx sCtx;             /* Context for fts3ExprTermOffsetInit() */
1600 
1601   if( !pCsr->pExpr ){
1602     sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
1603     return;
1604   }
1605 
1606   memset(&sCtx, 0, sizeof(sCtx));
1607   assert( pCsr->isRequireSeek==0 );
1608 
1609   /* Count the number of terms in the query */
1610   rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
1611   if( rc!=SQLITE_OK ) goto offsets_out;
1612 
1613   /* Allocate the array of TermOffset iterators. */
1614   sCtx.aTerm = (TermOffset *)sqlite3Fts3MallocZero(sizeof(TermOffset)*nToken);
1615   if( 0==sCtx.aTerm ){
1616     rc = SQLITE_NOMEM;
1617     goto offsets_out;
1618   }
1619   sCtx.iDocid = pCsr->iPrevId;
1620   sCtx.pCsr = pCsr;
1621 
1622   /* Loop through the table columns, appending offset information to
1623   ** string-buffer res for each column.
1624   */
1625   for(iCol=0; iCol<pTab->nColumn; iCol++){
1626     sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
1627     const char *ZDUMMY;           /* Dummy argument used with xNext() */
1628     int NDUMMY = 0;               /* Dummy argument used with xNext() */
1629     int iStart = 0;
1630     int iEnd = 0;
1631     int iCurrent = 0;
1632     const char *zDoc;
1633     int nDoc;
1634 
1635     /* Initialize the contents of sCtx.aTerm[] for column iCol. This
1636     ** operation may fail if the database contains corrupt records.
1637     */
1638     sCtx.iCol = iCol;
1639     sCtx.iTerm = 0;
1640     rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
1641     if( rc!=SQLITE_OK ) goto offsets_out;
1642 
1643     /* Retreive the text stored in column iCol. If an SQL NULL is stored
1644     ** in column iCol, jump immediately to the next iteration of the loop.
1645     ** If an OOM occurs while retrieving the data (this can happen if SQLite
1646     ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
1647     ** to the caller.
1648     */
1649     zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
1650     nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
1651     if( zDoc==0 ){
1652       if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
1653         continue;
1654       }
1655       rc = SQLITE_NOMEM;
1656       goto offsets_out;
1657     }
1658 
1659     /* Initialize a tokenizer iterator to iterate through column iCol. */
1660     rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
1661         zDoc, nDoc, &pC
1662     );
1663     if( rc!=SQLITE_OK ) goto offsets_out;
1664 
1665     rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1666     while( rc==SQLITE_OK ){
1667       int i;                      /* Used to loop through terms */
1668       int iMinPos = 0x7FFFFFFF;   /* Position of next token */
1669       TermOffset *pTerm = 0;      /* TermOffset associated with next token */
1670 
1671       for(i=0; i<nToken; i++){
1672         TermOffset *pT = &sCtx.aTerm[i];
1673         if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
1674           iMinPos = pT->iPos-pT->iOff;
1675           pTerm = pT;
1676         }
1677       }
1678 
1679       if( !pTerm ){
1680         /* All offsets for this column have been gathered. */
1681         rc = SQLITE_DONE;
1682       }else{
1683         assert_fts3_nc( iCurrent<=iMinPos );
1684         if( 0==(0xFE&*pTerm->pList) ){
1685           pTerm->pList = 0;
1686         }else{
1687           fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
1688         }
1689         while( rc==SQLITE_OK && iCurrent<iMinPos ){
1690           rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
1691         }
1692         if( rc==SQLITE_OK ){
1693           char aBuffer[64];
1694           sqlite3_snprintf(sizeof(aBuffer), aBuffer,
1695               "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
1696           );
1697           rc = fts3StringAppend(&res, aBuffer, -1);
1698         }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
1699           rc = FTS_CORRUPT_VTAB;
1700         }
1701       }
1702     }
1703     if( rc==SQLITE_DONE ){
1704       rc = SQLITE_OK;
1705     }
1706 
1707     pMod->xClose(pC);
1708     if( rc!=SQLITE_OK ) goto offsets_out;
1709   }
1710 
1711  offsets_out:
1712   sqlite3_free(sCtx.aTerm);
1713   assert( rc!=SQLITE_DONE );
1714   sqlite3Fts3SegmentsClose(pTab);
1715   if( rc!=SQLITE_OK ){
1716     sqlite3_result_error_code(pCtx,  rc);
1717     sqlite3_free(res.z);
1718   }else{
1719     sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
1720   }
1721   return;
1722 }
1723 
1724 /*
1725 ** Implementation of matchinfo() function.
1726 */
sqlite3Fts3Matchinfo(sqlite3_context * pContext,Fts3Cursor * pCsr,const char * zArg)1727 void sqlite3Fts3Matchinfo(
1728   sqlite3_context *pContext,      /* Function call context */
1729   Fts3Cursor *pCsr,               /* FTS3 table cursor */
1730   const char *zArg                /* Second arg to matchinfo() function */
1731 ){
1732   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
1733   const char *zFormat;
1734 
1735   if( zArg ){
1736     zFormat = zArg;
1737   }else{
1738     zFormat = FTS3_MATCHINFO_DEFAULT;
1739   }
1740 
1741   if( !pCsr->pExpr ){
1742     sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
1743     return;
1744   }else{
1745     /* Retrieve matchinfo() data. */
1746     fts3GetMatchinfo(pContext, pCsr, zFormat);
1747     sqlite3Fts3SegmentsClose(pTab);
1748   }
1749 }
1750 
1751 #endif
1752