xref: /sqlite-3.40.0/ext/fts5/fts5_test_mi.c (revision 2d77d80a)
1 /*
2 ** 2015 Aug 04
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains test code only, it is not included in release
14 ** versions of FTS5. It contains the implementation of an FTS5 auxiliary
15 ** function very similar to the FTS4 function matchinfo():
16 **
17 **     https://www.sqlite.org/fts3.html#matchinfo
18 **
19 ** Known differences are that:
20 **
21 **  1) this function uses the FTS5 definition of "matchable phrase", which
22 **     excludes any phrases that are part of an expression sub-tree that
23 **     does not match the current row. This comes up for MATCH queries
24 **     such as:
25 **
26 **         "a OR (b AND c)"
27 **
28 **     In FTS4, if a single row contains instances of tokens "a" and "c",
29 **     but not "b", all instances of "c" are considered matches. In FTS5,
30 **     they are not (as the "b AND c" sub-tree does not match the current
31 **     row.
32 **
33 **  2) For the values returned by 'x' that apply to all rows of the table,
34 **     NEAR constraints are not considered. But for the number of hits in
35 **     the current row, they are.
36 **
37 ** This file exports a single function that may be called to register the
38 ** matchinfo() implementation with a database handle:
39 **
40 **   int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
41 */
42 
43 
44 #ifdef SQLITE_ENABLE_FTS5
45 
46 #include "fts5.h"
47 #include <assert.h>
48 #include <string.h>
49 
50 typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
51 
52 #ifndef SQLITE_AMALGAMATION
53 typedef unsigned int u32;
54 #endif
55 
56 struct Fts5MatchinfoCtx {
57   int nCol;                       /* Number of cols in FTS5 table */
58   int nPhrase;                    /* Number of phrases in FTS5 query */
59   char *zArg;                     /* nul-term'd copy of 2nd arg */
60   int nRet;                       /* Number of elements in aRet[] */
61   u32 *aRet;                      /* Array of 32-bit unsigned ints to return */
62 };
63 
64 
65 
66 /*
67 ** Return a pointer to the fts5_api pointer for database connection db.
68 ** If an error occurs, return NULL and leave an error in the database
69 ** handle (accessible using sqlite3_errcode()/errmsg()).
70 */
fts5_api_from_db(sqlite3 * db,fts5_api ** ppApi)71 static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){
72   sqlite3_stmt *pStmt = 0;
73   int rc;
74 
75   *ppApi = 0;
76   rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
77   if( rc==SQLITE_OK ){
78     sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0);
79     (void)sqlite3_step(pStmt);
80     rc = sqlite3_finalize(pStmt);
81   }
82 
83   return rc;
84 }
85 
86 
87 /*
88 ** Argument f should be a flag accepted by matchinfo() (a valid character
89 ** in the string passed as the second argument). If it is not, -1 is
90 ** returned. Otherwise, if f is a valid matchinfo flag, the value returned
91 ** is the number of 32-bit integers added to the output array if the
92 ** table has nCol columns and the query nPhrase phrases.
93 */
fts5MatchinfoFlagsize(int nCol,int nPhrase,char f)94 static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
95   int ret = -1;
96   switch( f ){
97     case 'p': ret = 1; break;
98     case 'c': ret = 1; break;
99     case 'x': ret = 3 * nCol * nPhrase; break;
100     case 'y': ret = nCol * nPhrase; break;
101     case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
102     case 'n': ret = 1; break;
103     case 'a': ret = nCol; break;
104     case 'l': ret = nCol; break;
105     case 's': ret = nCol; break;
106   }
107   return ret;
108 }
109 
fts5MatchinfoIter(const Fts5ExtensionApi * pApi,Fts5Context * pFts,Fts5MatchinfoCtx * p,int (* x)(const Fts5ExtensionApi *,Fts5Context *,Fts5MatchinfoCtx *,char,u32 *))110 static int fts5MatchinfoIter(
111   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
112   Fts5Context *pFts,              /* First arg to pass to pApi functions */
113   Fts5MatchinfoCtx *p,
114   int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
115 ){
116   int i;
117   int n = 0;
118   int rc = SQLITE_OK;
119   char f;
120   for(i=0; (f = p->zArg[i]); i++){
121     rc = x(pApi, pFts, p, f, &p->aRet[n]);
122     if( rc!=SQLITE_OK ) break;
123     n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
124   }
125   return rc;
126 }
127 
fts5MatchinfoXCb(const Fts5ExtensionApi * pApi,Fts5Context * pFts,void * pUserData)128 static int fts5MatchinfoXCb(
129   const Fts5ExtensionApi *pApi,
130   Fts5Context *pFts,
131   void *pUserData
132 ){
133   Fts5PhraseIter iter;
134   int iCol, iOff;
135   u32 *aOut = (u32*)pUserData;
136   int iPrev = -1;
137 
138   for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
139       iCol>=0;
140       pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
141   ){
142     aOut[iCol*3+1]++;
143     if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
144     iPrev = iCol;
145   }
146 
147   return SQLITE_OK;
148 }
149 
fts5MatchinfoGlobalCb(const Fts5ExtensionApi * pApi,Fts5Context * pFts,Fts5MatchinfoCtx * p,char f,u32 * aOut)150 static int fts5MatchinfoGlobalCb(
151   const Fts5ExtensionApi *pApi,
152   Fts5Context *pFts,
153   Fts5MatchinfoCtx *p,
154   char f,
155   u32 *aOut
156 ){
157   int rc = SQLITE_OK;
158   switch( f ){
159     case 'p':
160       aOut[0] = p->nPhrase;
161       break;
162 
163     case 'c':
164       aOut[0] = p->nCol;
165       break;
166 
167     case 'x': {
168       int i;
169       for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
170         void *pPtr = (void*)&aOut[i * p->nCol * 3];
171         rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
172       }
173       break;
174     }
175 
176     case 'n': {
177       sqlite3_int64 nRow;
178       rc = pApi->xRowCount(pFts, &nRow);
179       aOut[0] = (u32)nRow;
180       break;
181     }
182 
183     case 'a': {
184       sqlite3_int64 nRow = 0;
185       rc = pApi->xRowCount(pFts, &nRow);
186       if( nRow==0 ){
187         memset(aOut, 0, sizeof(u32) * p->nCol);
188       }else{
189         int i;
190         for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
191           sqlite3_int64 nToken;
192           rc = pApi->xColumnTotalSize(pFts, i, &nToken);
193           if( rc==SQLITE_OK){
194             aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
195           }
196         }
197       }
198       break;
199     }
200 
201   }
202   return rc;
203 }
204 
fts5MatchinfoLocalCb(const Fts5ExtensionApi * pApi,Fts5Context * pFts,Fts5MatchinfoCtx * p,char f,u32 * aOut)205 static int fts5MatchinfoLocalCb(
206   const Fts5ExtensionApi *pApi,
207   Fts5Context *pFts,
208   Fts5MatchinfoCtx *p,
209   char f,
210   u32 *aOut
211 ){
212   int i;
213   int rc = SQLITE_OK;
214 
215   switch( f ){
216     case 'b': {
217       int iPhrase;
218       int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
219       for(i=0; i<nInt; i++) aOut[i] = 0;
220 
221       for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
222         Fts5PhraseIter iter;
223         int iCol;
224         for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
225             iCol>=0;
226             pApi->xPhraseNextColumn(pFts, &iter, &iCol)
227         ){
228           aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
229         }
230       }
231 
232       break;
233     }
234 
235     case 'x':
236     case 'y': {
237       int nMul = (f=='x' ? 3 : 1);
238       int iPhrase;
239 
240       for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
241 
242       for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
243         Fts5PhraseIter iter;
244         int iOff, iCol;
245         for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
246             iOff>=0;
247             pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
248         ){
249           aOut[nMul * (iCol + iPhrase * p->nCol)]++;
250         }
251       }
252 
253       break;
254     }
255 
256     case 'l': {
257       for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
258         int nToken;
259         rc = pApi->xColumnSize(pFts, i, &nToken);
260         aOut[i] = (u32)nToken;
261       }
262       break;
263     }
264 
265     case 's': {
266       int nInst;
267 
268       memset(aOut, 0, sizeof(u32) * p->nCol);
269 
270       rc = pApi->xInstCount(pFts, &nInst);
271       for(i=0; rc==SQLITE_OK && i<nInst; i++){
272         int iPhrase, iOff, iCol = 0;
273         int iNextPhrase;
274         int iNextOff;
275         u32 nSeq = 1;
276         int j;
277 
278         rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
279         iNextPhrase = iPhrase+1;
280         iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
281         for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
282           int ip, ic, io;
283           rc = pApi->xInst(pFts, j, &ip, &ic, &io);
284           if( ic!=iCol || io>iNextOff ) break;
285           if( ip==iNextPhrase && io==iNextOff ){
286             nSeq++;
287             iNextPhrase = ip+1;
288             iNextOff = io + pApi->xPhraseSize(pFts, ip);
289           }
290         }
291 
292         if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
293       }
294 
295       break;
296     }
297   }
298   return rc;
299 }
300 
fts5MatchinfoNew(const Fts5ExtensionApi * pApi,Fts5Context * pFts,sqlite3_context * pCtx,const char * zArg)301 static Fts5MatchinfoCtx *fts5MatchinfoNew(
302   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
303   Fts5Context *pFts,              /* First arg to pass to pApi functions */
304   sqlite3_context *pCtx,          /* Context for returning error message */
305   const char *zArg                /* Matchinfo flag string */
306 ){
307   Fts5MatchinfoCtx *p;
308   int nCol;
309   int nPhrase;
310   int i;
311   int nInt;
312   sqlite3_int64 nByte;
313   int rc;
314 
315   nCol = pApi->xColumnCount(pFts);
316   nPhrase = pApi->xPhraseCount(pFts);
317 
318   nInt = 0;
319   for(i=0; zArg[i]; i++){
320     int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
321     if( n<0 ){
322       char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
323       sqlite3_result_error(pCtx, zErr, -1);
324       sqlite3_free(zErr);
325       return 0;
326     }
327     nInt += n;
328   }
329 
330   nByte = sizeof(Fts5MatchinfoCtx)          /* The struct itself */
331          + sizeof(u32) * nInt               /* The p->aRet[] array */
332          + (i+1);                           /* The p->zArg string */
333   p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte);
334   if( p==0 ){
335     sqlite3_result_error_nomem(pCtx);
336     return 0;
337   }
338   memset(p, 0, nByte);
339 
340   p->nCol = nCol;
341   p->nPhrase = nPhrase;
342   p->aRet = (u32*)&p[1];
343   p->nRet = nInt;
344   p->zArg = (char*)&p->aRet[nInt];
345   memcpy(p->zArg, zArg, i);
346 
347   rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
348   if( rc!=SQLITE_OK ){
349     sqlite3_result_error_code(pCtx, rc);
350     sqlite3_free(p);
351     p = 0;
352   }
353 
354   return p;
355 }
356 
fts5MatchinfoFunc(const Fts5ExtensionApi * pApi,Fts5Context * pFts,sqlite3_context * pCtx,int nVal,sqlite3_value ** apVal)357 static void fts5MatchinfoFunc(
358   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
359   Fts5Context *pFts,              /* First arg to pass to pApi functions */
360   sqlite3_context *pCtx,          /* Context for returning result/error */
361   int nVal,                       /* Number of values in apVal[] array */
362   sqlite3_value **apVal           /* Array of trailing arguments */
363 ){
364   const char *zArg;
365   Fts5MatchinfoCtx *p;
366   int rc = SQLITE_OK;
367 
368   if( nVal>0 ){
369     zArg = (const char*)sqlite3_value_text(apVal[0]);
370   }else{
371     zArg = "pcx";
372   }
373 
374   p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
375   if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
376     p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
377     if( p==0 ){
378       rc = SQLITE_NOMEM;
379     }else{
380       rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
381     }
382   }
383 
384   if( rc==SQLITE_OK ){
385     rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
386   }
387   if( rc!=SQLITE_OK ){
388     sqlite3_result_error_code(pCtx, rc);
389   }else{
390     /* No errors has occured, so return a copy of the array of integers. */
391     int nByte = p->nRet * sizeof(u32);
392     sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
393   }
394 }
395 
sqlite3Fts5TestRegisterMatchinfo(sqlite3 * db)396 int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
397   int rc;                         /* Return code */
398   fts5_api *pApi;                 /* FTS5 API functions */
399 
400   /* Extract the FTS5 API pointer from the database handle. The
401   ** fts5_api_from_db() function above is copied verbatim from the
402   ** FTS5 documentation. Refer there for details. */
403   rc = fts5_api_from_db(db, &pApi);
404   if( rc!=SQLITE_OK ) return rc;
405 
406   /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
407   ** with this database handle, or an error (OOM perhaps?) has occurred.
408   **
409   ** Also check that the fts5_api object is version 2 or newer.
410   */
411   if( pApi==0 || pApi->iVersion<2 ){
412     return SQLITE_ERROR;
413   }
414 
415   /* Register the implementation of matchinfo() */
416   rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);
417 
418   return rc;
419 }
420 
421 #endif /* SQLITE_ENABLE_FTS5 */
422