xref: /sqlite-3.40.0/ext/fts5/fts5_buffer.c (revision 33cf1942)
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 */
13 
14 
15 
16 #include "fts5Int.h"
17 
sqlite3Fts5BufferSize(int * pRc,Fts5Buffer * pBuf,u32 nByte)18 int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
19   if( (u32)pBuf->nSpace<nByte ){
20     u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
21     u8 *pNew;
22     while( nNew<nByte ){
23       nNew = nNew * 2;
24     }
25     pNew = sqlite3_realloc64(pBuf->p, nNew);
26     if( pNew==0 ){
27       *pRc = SQLITE_NOMEM;
28       return 1;
29     }else{
30       pBuf->nSpace = (int)nNew;
31       pBuf->p = pNew;
32     }
33   }
34   return 0;
35 }
36 
37 
38 /*
39 ** Encode value iVal as an SQLite varint and append it to the buffer object
40 ** pBuf. If an OOM error occurs, set the error code in p.
41 */
sqlite3Fts5BufferAppendVarint(int * pRc,Fts5Buffer * pBuf,i64 iVal)42 void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
43   if( fts5BufferGrow(pRc, pBuf, 9) ) return;
44   pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
45 }
46 
sqlite3Fts5Put32(u8 * aBuf,int iVal)47 void sqlite3Fts5Put32(u8 *aBuf, int iVal){
48   aBuf[0] = (iVal>>24) & 0x00FF;
49   aBuf[1] = (iVal>>16) & 0x00FF;
50   aBuf[2] = (iVal>> 8) & 0x00FF;
51   aBuf[3] = (iVal>> 0) & 0x00FF;
52 }
53 
sqlite3Fts5Get32(const u8 * aBuf)54 int sqlite3Fts5Get32(const u8 *aBuf){
55   return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
56 }
57 
58 /*
59 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
60 ** the error code in p. If an error has already occurred when this function
61 ** is called, it is a no-op.
62 */
sqlite3Fts5BufferAppendBlob(int * pRc,Fts5Buffer * pBuf,u32 nData,const u8 * pData)63 void sqlite3Fts5BufferAppendBlob(
64   int *pRc,
65   Fts5Buffer *pBuf,
66   u32 nData,
67   const u8 *pData
68 ){
69   if( nData ){
70     if( fts5BufferGrow(pRc, pBuf, nData) ) return;
71     memcpy(&pBuf->p[pBuf->n], pData, nData);
72     pBuf->n += nData;
73   }
74 }
75 
76 /*
77 ** Append the nul-terminated string zStr to the buffer pBuf. This function
78 ** ensures that the byte following the buffer data is set to 0x00, even
79 ** though this byte is not included in the pBuf->n count.
80 */
sqlite3Fts5BufferAppendString(int * pRc,Fts5Buffer * pBuf,const char * zStr)81 void sqlite3Fts5BufferAppendString(
82   int *pRc,
83   Fts5Buffer *pBuf,
84   const char *zStr
85 ){
86   int nStr = (int)strlen(zStr);
87   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
88   pBuf->n--;
89 }
90 
91 /*
92 ** Argument zFmt is a printf() style format string. This function performs
93 ** the printf() style processing, then appends the results to buffer pBuf.
94 **
95 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
96 ** following the buffer data is set to 0x00, even though this byte is not
97 ** included in the pBuf->n count.
98 */
sqlite3Fts5BufferAppendPrintf(int * pRc,Fts5Buffer * pBuf,char * zFmt,...)99 void sqlite3Fts5BufferAppendPrintf(
100   int *pRc,
101   Fts5Buffer *pBuf,
102   char *zFmt, ...
103 ){
104   if( *pRc==SQLITE_OK ){
105     char *zTmp;
106     va_list ap;
107     va_start(ap, zFmt);
108     zTmp = sqlite3_vmprintf(zFmt, ap);
109     va_end(ap);
110 
111     if( zTmp==0 ){
112       *pRc = SQLITE_NOMEM;
113     }else{
114       sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
115       sqlite3_free(zTmp);
116     }
117   }
118 }
119 
sqlite3Fts5Mprintf(int * pRc,const char * zFmt,...)120 char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
121   char *zRet = 0;
122   if( *pRc==SQLITE_OK ){
123     va_list ap;
124     va_start(ap, zFmt);
125     zRet = sqlite3_vmprintf(zFmt, ap);
126     va_end(ap);
127     if( zRet==0 ){
128       *pRc = SQLITE_NOMEM;
129     }
130   }
131   return zRet;
132 }
133 
134 
135 /*
136 ** Free any buffer allocated by pBuf. Zero the structure before returning.
137 */
sqlite3Fts5BufferFree(Fts5Buffer * pBuf)138 void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
139   sqlite3_free(pBuf->p);
140   memset(pBuf, 0, sizeof(Fts5Buffer));
141 }
142 
143 /*
144 ** Zero the contents of the buffer object. But do not free the associated
145 ** memory allocation.
146 */
sqlite3Fts5BufferZero(Fts5Buffer * pBuf)147 void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
148   pBuf->n = 0;
149 }
150 
151 /*
152 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
153 ** the error code in p. If an error has already occurred when this function
154 ** is called, it is a no-op.
155 */
sqlite3Fts5BufferSet(int * pRc,Fts5Buffer * pBuf,int nData,const u8 * pData)156 void sqlite3Fts5BufferSet(
157   int *pRc,
158   Fts5Buffer *pBuf,
159   int nData,
160   const u8 *pData
161 ){
162   pBuf->n = 0;
163   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
164 }
165 
sqlite3Fts5PoslistNext64(const u8 * a,int n,int * pi,i64 * piOff)166 int sqlite3Fts5PoslistNext64(
167   const u8 *a, int n,             /* Buffer containing poslist */
168   int *pi,                        /* IN/OUT: Offset within a[] */
169   i64 *piOff                      /* IN/OUT: Current offset */
170 ){
171   int i = *pi;
172   if( i>=n ){
173     /* EOF */
174     *piOff = -1;
175     return 1;
176   }else{
177     i64 iOff = *piOff;
178     u32 iVal;
179     fts5FastGetVarint32(a, i, iVal);
180     if( iVal<=1 ){
181       if( iVal==0 ){
182         *pi = i;
183         return 0;
184       }
185       fts5FastGetVarint32(a, i, iVal);
186       iOff = ((i64)iVal) << 32;
187       assert( iOff>=0 );
188       fts5FastGetVarint32(a, i, iVal);
189       if( iVal<2 ){
190         /* This is a corrupt record. So stop parsing it here. */
191         *piOff = -1;
192         return 1;
193       }
194       *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
195     }else{
196       *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
197     }
198     *pi = i;
199     assert_nc( *piOff>=iOff );
200     return 0;
201   }
202 }
203 
204 
205 /*
206 ** Advance the iterator object passed as the only argument. Return true
207 ** if the iterator reaches EOF, or false otherwise.
208 */
sqlite3Fts5PoslistReaderNext(Fts5PoslistReader * pIter)209 int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
210   if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
211     pIter->bEof = 1;
212   }
213   return pIter->bEof;
214 }
215 
sqlite3Fts5PoslistReaderInit(const u8 * a,int n,Fts5PoslistReader * pIter)216 int sqlite3Fts5PoslistReaderInit(
217   const u8 *a, int n,             /* Poslist buffer to iterate through */
218   Fts5PoslistReader *pIter        /* Iterator object to initialize */
219 ){
220   memset(pIter, 0, sizeof(*pIter));
221   pIter->a = a;
222   pIter->n = n;
223   sqlite3Fts5PoslistReaderNext(pIter);
224   return pIter->bEof;
225 }
226 
227 /*
228 ** Append position iPos to the position list being accumulated in buffer
229 ** pBuf, which must be already be large enough to hold the new data.
230 ** The previous position written to this list is *piPrev. *piPrev is set
231 ** to iPos before returning.
232 */
sqlite3Fts5PoslistSafeAppend(Fts5Buffer * pBuf,i64 * piPrev,i64 iPos)233 void sqlite3Fts5PoslistSafeAppend(
234   Fts5Buffer *pBuf,
235   i64 *piPrev,
236   i64 iPos
237 ){
238   if( iPos>=*piPrev ){
239     static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
240     if( (iPos & colmask) != (*piPrev & colmask) ){
241       pBuf->p[pBuf->n++] = 1;
242       pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
243       *piPrev = (iPos & colmask);
244     }
245     pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
246     *piPrev = iPos;
247   }
248 }
249 
sqlite3Fts5PoslistWriterAppend(Fts5Buffer * pBuf,Fts5PoslistWriter * pWriter,i64 iPos)250 int sqlite3Fts5PoslistWriterAppend(
251   Fts5Buffer *pBuf,
252   Fts5PoslistWriter *pWriter,
253   i64 iPos
254 ){
255   int rc = 0;   /* Initialized only to suppress erroneous warning from Clang */
256   if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
257   sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
258   return SQLITE_OK;
259 }
260 
sqlite3Fts5MallocZero(int * pRc,sqlite3_int64 nByte)261 void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
262   void *pRet = 0;
263   if( *pRc==SQLITE_OK ){
264     pRet = sqlite3_malloc64(nByte);
265     if( pRet==0 ){
266       if( nByte>0 ) *pRc = SQLITE_NOMEM;
267     }else{
268       memset(pRet, 0, (size_t)nByte);
269     }
270   }
271   return pRet;
272 }
273 
274 /*
275 ** Return a nul-terminated copy of the string indicated by pIn. If nIn
276 ** is non-negative, then it is the length of the string in bytes. Otherwise,
277 ** the length of the string is determined using strlen().
278 **
279 ** It is the responsibility of the caller to eventually free the returned
280 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
281 */
sqlite3Fts5Strndup(int * pRc,const char * pIn,int nIn)282 char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
283   char *zRet = 0;
284   if( *pRc==SQLITE_OK ){
285     if( nIn<0 ){
286       nIn = (int)strlen(pIn);
287     }
288     zRet = (char*)sqlite3_malloc(nIn+1);
289     if( zRet ){
290       memcpy(zRet, pIn, nIn);
291       zRet[nIn] = '\0';
292     }else{
293       *pRc = SQLITE_NOMEM;
294     }
295   }
296   return zRet;
297 }
298 
299 
300 /*
301 ** Return true if character 't' may be part of an FTS5 bareword, or false
302 ** otherwise. Characters that may be part of barewords:
303 **
304 **   * All non-ASCII characters,
305 **   * The 52 upper and lower case ASCII characters, and
306 **   * The 10 integer ASCII characters.
307 **   * The underscore character "_" (0x5F).
308 **   * The unicode "subsitute" character (0x1A).
309 */
sqlite3Fts5IsBareword(char t)310 int sqlite3Fts5IsBareword(char t){
311   u8 aBareword[128] = {
312     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
313     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 1, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
314     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
315     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
316     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
317     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
318     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
319     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
320   };
321 
322   return (t & 0x80) || aBareword[(int)t];
323 }
324 
325 
326 /*************************************************************************
327 */
328 typedef struct Fts5TermsetEntry Fts5TermsetEntry;
329 struct Fts5TermsetEntry {
330   char *pTerm;
331   int nTerm;
332   int iIdx;                       /* Index (main or aPrefix[] entry) */
333   Fts5TermsetEntry *pNext;
334 };
335 
336 struct Fts5Termset {
337   Fts5TermsetEntry *apHash[512];
338 };
339 
sqlite3Fts5TermsetNew(Fts5Termset ** pp)340 int sqlite3Fts5TermsetNew(Fts5Termset **pp){
341   int rc = SQLITE_OK;
342   *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
343   return rc;
344 }
345 
sqlite3Fts5TermsetAdd(Fts5Termset * p,int iIdx,const char * pTerm,int nTerm,int * pbPresent)346 int sqlite3Fts5TermsetAdd(
347   Fts5Termset *p,
348   int iIdx,
349   const char *pTerm, int nTerm,
350   int *pbPresent
351 ){
352   int rc = SQLITE_OK;
353   *pbPresent = 0;
354   if( p ){
355     int i;
356     u32 hash = 13;
357     Fts5TermsetEntry *pEntry;
358 
359     /* Calculate a hash value for this term. This is the same hash checksum
360     ** used by the fts5_hash.c module. This is not important for correct
361     ** operation of the module, but is necessary to ensure that some tests
362     ** designed to produce hash table collisions really do work.  */
363     for(i=nTerm-1; i>=0; i--){
364       hash = (hash << 3) ^ hash ^ pTerm[i];
365     }
366     hash = (hash << 3) ^ hash ^ iIdx;
367     hash = hash % ArraySize(p->apHash);
368 
369     for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
370       if( pEntry->iIdx==iIdx
371           && pEntry->nTerm==nTerm
372           && memcmp(pEntry->pTerm, pTerm, nTerm)==0
373       ){
374         *pbPresent = 1;
375         break;
376       }
377     }
378 
379     if( pEntry==0 ){
380       pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
381       if( pEntry ){
382         pEntry->pTerm = (char*)&pEntry[1];
383         pEntry->nTerm = nTerm;
384         pEntry->iIdx = iIdx;
385         memcpy(pEntry->pTerm, pTerm, nTerm);
386         pEntry->pNext = p->apHash[hash];
387         p->apHash[hash] = pEntry;
388       }
389     }
390   }
391 
392   return rc;
393 }
394 
sqlite3Fts5TermsetFree(Fts5Termset * p)395 void sqlite3Fts5TermsetFree(Fts5Termset *p){
396   if( p ){
397     u32 i;
398     for(i=0; i<ArraySize(p->apHash); i++){
399       Fts5TermsetEntry *pEntry = p->apHash[i];
400       while( pEntry ){
401         Fts5TermsetEntry *pDel = pEntry;
402         pEntry = pEntry->pNext;
403         sqlite3_free(pDel);
404       }
405     }
406     sqlite3_free(p);
407   }
408 }
409