xref: /sqlite-3.40.0/ext/fts5/fts5_index.c (revision 838865c0)
1 /*
2 ** 2014 May 31
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** Low level access to the FTS index stored in the database file. The
14 ** routines in this file file implement all read and write access to the
15 ** %_data table. Other parts of the system access this functionality via
16 ** the interface defined in fts5Int.h.
17 */
18 
19 
20 #include "fts5Int.h"
21 
22 /*
23 ** Overview:
24 **
25 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
26 ** As well as the main term index, there may be up to 31 prefix indexes.
27 ** The format is similar to FTS3/4, except that:
28 **
29 **   * all segment b-tree leaf data is stored in fixed size page records
30 **     (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
31 **     taken to ensure it is possible to iterate in either direction through
32 **     the entries in a doclist, or to seek to a specific entry within a
33 **     doclist, without loading it into memory.
34 **
35 **   * large doclists that span many pages have associated "doclist index"
36 **     records that contain a copy of the first rowid on each page spanned by
37 **     the doclist. This is used to speed up seek operations, and merges of
38 **     large doclists with very small doclists.
39 **
40 **   * extra fields in the "structure record" record the state of ongoing
41 **     incremental merge operations.
42 **
43 */
44 
45 
46 #define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
47 #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
48 
49 #define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */
50 
51 #define FTS5_MAIN_PREFIX '0'
52 
53 #if FTS5_MAX_PREFIX_INDEXES > 31
54 # error "FTS5_MAX_PREFIX_INDEXES is too large"
55 #endif
56 
57 /*
58 ** Details:
59 **
60 ** The %_data table managed by this module,
61 **
62 **     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
63 **
64 ** , contains the following 5 types of records. See the comments surrounding
65 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
66 ** assigned to each fo them.
67 **
68 ** 1. Structure Records:
69 **
70 **   The set of segments that make up an index - the index structure - are
71 **   recorded in a single record within the %_data table. The record consists
72 **   of a single 32-bit configuration cookie value followed by a list of
73 **   SQLite varints. If the FTS table features more than one index (because
74 **   there are one or more prefix indexes), it is guaranteed that all share
75 **   the same cookie value.
76 **
77 **   Immediately following the configuration cookie, the record begins with
78 **   three varints:
79 **
80 **     + number of levels,
81 **     + total number of segments on all levels,
82 **     + value of write counter.
83 **
84 **   Then, for each level from 0 to nMax:
85 **
86 **     + number of input segments in ongoing merge.
87 **     + total number of segments in level.
88 **     + for each segment from oldest to newest:
89 **         + segment id (always > 0)
90 **         + first leaf page number (often 1, always greater than 0)
91 **         + final leaf page number
92 **
93 ** 2. The Averages Record:
94 **
95 **   A single record within the %_data table. The data is a list of varints.
96 **   The first value is the number of rows in the index. Then, for each column
97 **   from left to right, the total number of tokens in the column for all
98 **   rows of the table.
99 **
100 ** 3. Segment leaves:
101 **
102 **   TERM/DOCLIST FORMAT:
103 **
104 **     Most of each segment leaf is taken up by term/doclist data. The
105 **     general format of term/doclist, starting with the first term
106 **     on the leaf page, is:
107 **
108 **         varint : size of first term
109 **         blob:    first term data
110 **         doclist: first doclist
111 **         zero-or-more {
112 **           varint:  number of bytes in common with previous term
113 **           varint:  number of bytes of new term data (nNew)
114 **           blob:    nNew bytes of new term data
115 **           doclist: next doclist
116 **         }
117 **
118 **     doclist format:
119 **
120 **         varint:  first rowid
121 **         poslist: first poslist
122 **         zero-or-more {
123 **           varint:  rowid delta (always > 0)
124 **           poslist: next poslist
125 **         }
126 **
127 **     poslist format:
128 **
129 **         varint: size of poslist in bytes multiplied by 2, not including
130 **                 this field. Plus 1 if this entry carries the "delete" flag.
131 **         collist: collist for column 0
132 **         zero-or-more {
133 **           0x01 byte
134 **           varint: column number (I)
135 **           collist: collist for column I
136 **         }
137 **
138 **     collist format:
139 **
140 **         varint: first offset + 2
141 **         zero-or-more {
142 **           varint: offset delta + 2
143 **         }
144 **
145 **   PAGE FORMAT
146 **
147 **     Each leaf page begins with a 4-byte header containing 2 16-bit
148 **     unsigned integer fields in big-endian format. They are:
149 **
150 **       * The byte offset of the first rowid on the page, if it exists
151 **         and occurs before the first term (otherwise 0).
152 **
153 **       * The byte offset of the start of the page footer. If the page
154 **         footer is 0 bytes in size, then this field is the same as the
155 **         size of the leaf page in bytes.
156 **
157 **     The page footer consists of a single varint for each term located
158 **     on the page. Each varint is the byte offset of the current term
159 **     within the page, delta-compressed against the previous value. In
160 **     other words, the first varint in the footer is the byte offset of
161 **     the first term, the second is the byte offset of the second less that
162 **     of the first, and so on.
163 **
164 **     The term/doclist format described above is accurate if the entire
165 **     term/doclist data fits on a single leaf page. If this is not the case,
166 **     the format is changed in two ways:
167 **
168 **       + if the first rowid on a page occurs before the first term, it
169 **         is stored as a literal value:
170 **
171 **             varint:  first rowid
172 **
173 **       + the first term on each page is stored in the same way as the
174 **         very first term of the segment:
175 **
176 **             varint : size of first term
177 **             blob:    first term data
178 **
179 ** 5. Segment doclist indexes:
180 **
181 **   Doclist indexes are themselves b-trees, however they usually consist of
182 **   a single leaf record only. The format of each doclist index leaf page
183 **   is:
184 **
185 **     * Flags byte. Bits are:
186 **         0x01: Clear if leaf is also the root page, otherwise set.
187 **
188 **     * Page number of fts index leaf page. As a varint.
189 **
190 **     * First rowid on page indicated by previous field. As a varint.
191 **
192 **     * A list of varints, one for each subsequent termless page. A
193 **       positive delta if the termless page contains at least one rowid,
194 **       or an 0x00 byte otherwise.
195 **
196 **   Internal doclist index nodes are:
197 **
198 **     * Flags byte. Bits are:
199 **         0x01: Clear for root page, otherwise set.
200 **
201 **     * Page number of first child page. As a varint.
202 **
203 **     * Copy of first rowid on page indicated by previous field. As a varint.
204 **
205 **     * A list of delta-encoded varints - the first rowid on each subsequent
206 **       child page.
207 **
208 */
209 
210 /*
211 ** Rowids for the averages and structure records in the %_data table.
212 */
213 #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
214 #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
215 
216 /*
217 ** Macros determining the rowids used by segment leaves and dlidx leaves
218 ** and nodes. All nodes and leaves are stored in the %_data table with large
219 ** positive rowids.
220 **
221 ** Each segment has a unique non-zero 16-bit id.
222 **
223 ** The rowid for each segment leaf is found by passing the segment id and
224 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
225 ** sequentially starting from 1.
226 */
227 #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
228 #define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
229 #define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
230 #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
231 
232 #define fts5_dri(segid, dlidx, height, pgno) (                                 \
233  ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
234  ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
235  ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
236  ((i64)(pgno))                                                                 \
237 )
238 
239 #define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
240 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
241 
242 #ifdef SQLITE_DEBUG
sqlite3Fts5Corrupt()243 int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
244 #endif
245 
246 
247 /*
248 ** Each time a blob is read from the %_data table, it is padded with this
249 ** many zero bytes. This makes it easier to decode the various record formats
250 ** without overreading if the records are corrupt.
251 */
252 #define FTS5_DATA_ZERO_PADDING 8
253 #define FTS5_DATA_PADDING 20
254 
255 typedef struct Fts5Data Fts5Data;
256 typedef struct Fts5DlidxIter Fts5DlidxIter;
257 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
258 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
259 typedef struct Fts5Iter Fts5Iter;
260 typedef struct Fts5PageWriter Fts5PageWriter;
261 typedef struct Fts5SegIter Fts5SegIter;
262 typedef struct Fts5DoclistIter Fts5DoclistIter;
263 typedef struct Fts5SegWriter Fts5SegWriter;
264 typedef struct Fts5Structure Fts5Structure;
265 typedef struct Fts5StructureLevel Fts5StructureLevel;
266 typedef struct Fts5StructureSegment Fts5StructureSegment;
267 
268 struct Fts5Data {
269   u8 *p;                          /* Pointer to buffer containing record */
270   int nn;                         /* Size of record in bytes */
271   int szLeaf;                     /* Size of leaf without page-index */
272 };
273 
274 /*
275 ** One object per %_data table.
276 */
277 struct Fts5Index {
278   Fts5Config *pConfig;            /* Virtual table configuration */
279   char *zDataTbl;                 /* Name of %_data table */
280   int nWorkUnit;                  /* Leaf pages in a "unit" of work */
281 
282   /*
283   ** Variables related to the accumulation of tokens and doclists within the
284   ** in-memory hash tables before they are flushed to disk.
285   */
286   Fts5Hash *pHash;                /* Hash table for in-memory data */
287   int nPendingData;               /* Current bytes of pending data */
288   i64 iWriteRowid;                /* Rowid for current doc being written */
289   int bDelete;                    /* Current write is a delete */
290 
291   /* Error state. */
292   int rc;                         /* Current error code */
293 
294   /* State used by the fts5DataXXX() functions. */
295   sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
296   sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
297   sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
298   sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
299   sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=?" */
300   sqlite3_stmt *pIdxSelect;
301   int nRead;                      /* Total number of blocks read */
302 
303   sqlite3_stmt *pDataVersion;
304   i64 iStructVersion;             /* data_version when pStruct read */
305   Fts5Structure *pStruct;         /* Current db structure (or NULL) */
306 };
307 
308 struct Fts5DoclistIter {
309   u8 *aEof;                       /* Pointer to 1 byte past end of doclist */
310 
311   /* Output variables. aPoslist==0 at EOF */
312   i64 iRowid;
313   u8 *aPoslist;
314   int nPoslist;
315   int nSize;
316 };
317 
318 /*
319 ** The contents of the "structure" record for each index are represented
320 ** using an Fts5Structure record in memory. Which uses instances of the
321 ** other Fts5StructureXXX types as components.
322 */
323 struct Fts5StructureSegment {
324   int iSegid;                     /* Segment id */
325   int pgnoFirst;                  /* First leaf page number in segment */
326   int pgnoLast;                   /* Last leaf page number in segment */
327 };
328 struct Fts5StructureLevel {
329   int nMerge;                     /* Number of segments in incr-merge */
330   int nSeg;                       /* Total number of segments on level */
331   Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
332 };
333 struct Fts5Structure {
334   int nRef;                       /* Object reference count */
335   u64 nWriteCounter;              /* Total leaves written to level 0 */
336   int nSegment;                   /* Total segments in this structure */
337   int nLevel;                     /* Number of levels in this index */
338   Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
339 };
340 
341 /*
342 ** An object of type Fts5SegWriter is used to write to segments.
343 */
344 struct Fts5PageWriter {
345   int pgno;                       /* Page number for this page */
346   int iPrevPgidx;                 /* Previous value written into pgidx */
347   Fts5Buffer buf;                 /* Buffer containing leaf data */
348   Fts5Buffer pgidx;               /* Buffer containing page-index */
349   Fts5Buffer term;                /* Buffer containing previous term on page */
350 };
351 struct Fts5DlidxWriter {
352   int pgno;                       /* Page number for this page */
353   int bPrevValid;                 /* True if iPrev is valid */
354   i64 iPrev;                      /* Previous rowid value written to page */
355   Fts5Buffer buf;                 /* Buffer containing page data */
356 };
357 struct Fts5SegWriter {
358   int iSegid;                     /* Segid to write to */
359   Fts5PageWriter writer;          /* PageWriter object */
360   i64 iPrevRowid;                 /* Previous rowid written to current leaf */
361   u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
362   u8 bFirstRowidInPage;           /* True if next rowid is first in page */
363   /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
364   u8 bFirstTermInPage;            /* True if next term will be first in leaf */
365   int nLeafWritten;               /* Number of leaf pages written */
366   int nEmpty;                     /* Number of contiguous term-less nodes */
367 
368   int nDlidx;                     /* Allocated size of aDlidx[] array */
369   Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
370 
371   /* Values to insert into the %_idx table */
372   Fts5Buffer btterm;              /* Next term to insert into %_idx table */
373   int iBtPage;                    /* Page number corresponding to btterm */
374 };
375 
376 typedef struct Fts5CResult Fts5CResult;
377 struct Fts5CResult {
378   u16 iFirst;                     /* aSeg[] index of firstest iterator */
379   u8 bTermEq;                     /* True if the terms are equal */
380 };
381 
382 /*
383 ** Object for iterating through a single segment, visiting each term/rowid
384 ** pair in the segment.
385 **
386 ** pSeg:
387 **   The segment to iterate through.
388 **
389 ** iLeafPgno:
390 **   Current leaf page number within segment.
391 **
392 ** iLeafOffset:
393 **   Byte offset within the current leaf that is the first byte of the
394 **   position list data (one byte passed the position-list size field).
395 **   rowid field of the current entry. Usually this is the size field of the
396 **   position list data. The exception is if the rowid for the current entry
397 **   is the last thing on the leaf page.
398 **
399 ** pLeaf:
400 **   Buffer containing current leaf page data. Set to NULL at EOF.
401 **
402 ** iTermLeafPgno, iTermLeafOffset:
403 **   Leaf page number containing the last term read from the segment. And
404 **   the offset immediately following the term data.
405 **
406 ** flags:
407 **   Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
408 **
409 **   FTS5_SEGITER_ONETERM:
410 **     If set, set the iterator to point to EOF after the current doclist
411 **     has been exhausted. Do not proceed to the next term in the segment.
412 **
413 **   FTS5_SEGITER_REVERSE:
414 **     This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
415 **     it is set, iterate through rowid in descending order instead of the
416 **     default ascending order.
417 **
418 ** iRowidOffset/nRowidOffset/aRowidOffset:
419 **     These are used if the FTS5_SEGITER_REVERSE flag is set.
420 **
421 **     For each rowid on the page corresponding to the current term, the
422 **     corresponding aRowidOffset[] entry is set to the byte offset of the
423 **     start of the "position-list-size" field within the page.
424 **
425 ** iTermIdx:
426 **     Index of current term on iTermLeafPgno.
427 */
428 struct Fts5SegIter {
429   Fts5StructureSegment *pSeg;     /* Segment to iterate through */
430   int flags;                      /* Mask of configuration flags */
431   int iLeafPgno;                  /* Current leaf page number */
432   Fts5Data *pLeaf;                /* Current leaf data */
433   Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
434   i64 iLeafOffset;                /* Byte offset within current leaf */
435 
436   /* Next method */
437   void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
438 
439   /* The page and offset from which the current term was read. The offset
440   ** is the offset of the first rowid in the current doclist.  */
441   int iTermLeafPgno;
442   int iTermLeafOffset;
443 
444   int iPgidxOff;                  /* Next offset in pgidx */
445   int iEndofDoclist;
446 
447   /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
448   int iRowidOffset;               /* Current entry in aRowidOffset[] */
449   int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
450   int *aRowidOffset;              /* Array of offset to rowid fields */
451 
452   Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */
453 
454   /* Variables populated based on current entry. */
455   Fts5Buffer term;                /* Current term */
456   i64 iRowid;                     /* Current rowid */
457   int nPos;                       /* Number of bytes in current position list */
458   u8 bDel;                        /* True if the delete flag is set */
459 };
460 
461 /*
462 ** Argument is a pointer to an Fts5Data structure that contains a
463 ** leaf page.
464 */
465 #define ASSERT_SZLEAF_OK(x) assert( \
466     (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
467 )
468 
469 #define FTS5_SEGITER_ONETERM 0x01
470 #define FTS5_SEGITER_REVERSE 0x02
471 
472 /*
473 ** Argument is a pointer to an Fts5Data structure that contains a leaf
474 ** page. This macro evaluates to true if the leaf contains no terms, or
475 ** false if it contains at least one term.
476 */
477 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
478 
479 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
480 
481 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
482 
483 /*
484 ** Object for iterating through the merged results of one or more segments,
485 ** visiting each term/rowid pair in the merged data.
486 **
487 ** nSeg is always a power of two greater than or equal to the number of
488 ** segments that this object is merging data from. Both the aSeg[] and
489 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
490 ** with zeroed objects - these are handled as if they were iterators opened
491 ** on empty segments.
492 **
493 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
494 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
495 ** comparison in this context is the index of the iterator that currently
496 ** points to the smaller term/rowid combination. Iterators at EOF are
497 ** considered to be greater than all other iterators.
498 **
499 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
500 ** the smallest key overall. aFirst[0] is unused.
501 **
502 ** poslist:
503 **   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
504 **   There is no way to tell if this is populated or not.
505 */
506 struct Fts5Iter {
507   Fts5IndexIter base;             /* Base class containing output vars */
508 
509   Fts5Index *pIndex;              /* Index that owns this iterator */
510   Fts5Buffer poslist;             /* Buffer containing current poslist */
511   Fts5Colset *pColset;            /* Restrict matches to these columns */
512 
513   /* Invoked to set output variables. */
514   void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
515 
516   int nSeg;                       /* Size of aSeg[] array */
517   int bRev;                       /* True to iterate in reverse order */
518   u8 bSkipEmpty;                  /* True to skip deleted entries */
519 
520   i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
521   Fts5CResult *aFirst;            /* Current merge state (see above) */
522   Fts5SegIter aSeg[1];            /* Array of segment iterators */
523 };
524 
525 
526 /*
527 ** An instance of the following type is used to iterate through the contents
528 ** of a doclist-index record.
529 **
530 ** pData:
531 **   Record containing the doclist-index data.
532 **
533 ** bEof:
534 **   Set to true once iterator has reached EOF.
535 **
536 ** iOff:
537 **   Set to the current offset within record pData.
538 */
539 struct Fts5DlidxLvl {
540   Fts5Data *pData;              /* Data for current page of this level */
541   int iOff;                     /* Current offset into pData */
542   int bEof;                     /* At EOF already */
543   int iFirstOff;                /* Used by reverse iterators */
544 
545   /* Output variables */
546   int iLeafPgno;                /* Page number of current leaf page */
547   i64 iRowid;                   /* First rowid on leaf iLeafPgno */
548 };
549 struct Fts5DlidxIter {
550   int nLvl;
551   int iSegid;
552   Fts5DlidxLvl aLvl[1];
553 };
554 
fts5PutU16(u8 * aOut,u16 iVal)555 static void fts5PutU16(u8 *aOut, u16 iVal){
556   aOut[0] = (iVal>>8);
557   aOut[1] = (iVal&0xFF);
558 }
559 
fts5GetU16(const u8 * aIn)560 static u16 fts5GetU16(const u8 *aIn){
561   return ((u16)aIn[0] << 8) + aIn[1];
562 }
563 
564 /*
565 ** Allocate and return a buffer at least nByte bytes in size.
566 **
567 ** If an OOM error is encountered, return NULL and set the error code in
568 ** the Fts5Index handle passed as the first argument.
569 */
fts5IdxMalloc(Fts5Index * p,sqlite3_int64 nByte)570 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
571   return sqlite3Fts5MallocZero(&p->rc, nByte);
572 }
573 
574 /*
575 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
576 **
577 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
578 ** +ve if pRight is smaller than pLeft. In other words:
579 **
580 **     res = *pLeft - *pRight
581 */
582 #ifdef SQLITE_DEBUG
fts5BufferCompareBlob(Fts5Buffer * pLeft,const u8 * pRight,int nRight)583 static int fts5BufferCompareBlob(
584   Fts5Buffer *pLeft,              /* Left hand side of comparison */
585   const u8 *pRight, int nRight    /* Right hand side of comparison */
586 ){
587   int nCmp = MIN(pLeft->n, nRight);
588   int res = memcmp(pLeft->p, pRight, nCmp);
589   return (res==0 ? (pLeft->n - nRight) : res);
590 }
591 #endif
592 
593 /*
594 ** Compare the contents of the two buffers using memcmp(). If one buffer
595 ** is a prefix of the other, it is considered the lesser.
596 **
597 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
598 ** +ve if pRight is smaller than pLeft. In other words:
599 **
600 **     res = *pLeft - *pRight
601 */
fts5BufferCompare(Fts5Buffer * pLeft,Fts5Buffer * pRight)602 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
603   int nCmp, res;
604   nCmp = MIN(pLeft->n, pRight->n);
605   assert( nCmp<=0 || pLeft->p!=0 );
606   assert( nCmp<=0 || pRight->p!=0 );
607   res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
608   return (res==0 ? (pLeft->n - pRight->n) : res);
609 }
610 
fts5LeafFirstTermOff(Fts5Data * pLeaf)611 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
612   int ret;
613   fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
614   return ret;
615 }
616 
617 /*
618 ** Close the read-only blob handle, if it is open.
619 */
sqlite3Fts5IndexCloseReader(Fts5Index * p)620 void sqlite3Fts5IndexCloseReader(Fts5Index *p){
621   if( p->pReader ){
622     sqlite3_blob *pReader = p->pReader;
623     p->pReader = 0;
624     sqlite3_blob_close(pReader);
625   }
626 }
627 
628 /*
629 ** Retrieve a record from the %_data table.
630 **
631 ** If an error occurs, NULL is returned and an error left in the
632 ** Fts5Index object.
633 */
fts5DataRead(Fts5Index * p,i64 iRowid)634 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
635   Fts5Data *pRet = 0;
636   if( p->rc==SQLITE_OK ){
637     int rc = SQLITE_OK;
638 
639     if( p->pReader ){
640       /* This call may return SQLITE_ABORT if there has been a savepoint
641       ** rollback since it was last used. In this case a new blob handle
642       ** is required.  */
643       sqlite3_blob *pBlob = p->pReader;
644       p->pReader = 0;
645       rc = sqlite3_blob_reopen(pBlob, iRowid);
646       assert( p->pReader==0 );
647       p->pReader = pBlob;
648       if( rc!=SQLITE_OK ){
649         sqlite3Fts5IndexCloseReader(p);
650       }
651       if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
652     }
653 
654     /* If the blob handle is not open at this point, open it and seek
655     ** to the requested entry.  */
656     if( p->pReader==0 && rc==SQLITE_OK ){
657       Fts5Config *pConfig = p->pConfig;
658       rc = sqlite3_blob_open(pConfig->db,
659           pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
660       );
661     }
662 
663     /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
664     ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
665     ** All the reasons those functions might return SQLITE_ERROR - missing
666     ** table, missing row, non-blob/text in block column - indicate
667     ** backing store corruption.  */
668     if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
669 
670     if( rc==SQLITE_OK ){
671       u8 *aOut = 0;               /* Read blob data into this buffer */
672       int nByte = sqlite3_blob_bytes(p->pReader);
673       sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
674       pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
675       if( pRet ){
676         pRet->nn = nByte;
677         aOut = pRet->p = (u8*)&pRet[1];
678       }else{
679         rc = SQLITE_NOMEM;
680       }
681 
682       if( rc==SQLITE_OK ){
683         rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
684       }
685       if( rc!=SQLITE_OK ){
686         sqlite3_free(pRet);
687         pRet = 0;
688       }else{
689         /* TODO1: Fix this */
690         pRet->p[nByte] = 0x00;
691         pRet->p[nByte+1] = 0x00;
692         pRet->szLeaf = fts5GetU16(&pRet->p[2]);
693       }
694     }
695     p->rc = rc;
696     p->nRead++;
697   }
698 
699   assert( (pRet==0)==(p->rc!=SQLITE_OK) );
700   return pRet;
701 }
702 
703 
704 /*
705 ** Release a reference to data record returned by an earlier call to
706 ** fts5DataRead().
707 */
fts5DataRelease(Fts5Data * pData)708 static void fts5DataRelease(Fts5Data *pData){
709   sqlite3_free(pData);
710 }
711 
fts5LeafRead(Fts5Index * p,i64 iRowid)712 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
713   Fts5Data *pRet = fts5DataRead(p, iRowid);
714   if( pRet ){
715     if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
716       p->rc = FTS5_CORRUPT;
717       fts5DataRelease(pRet);
718       pRet = 0;
719     }
720   }
721   return pRet;
722 }
723 
fts5IndexPrepareStmt(Fts5Index * p,sqlite3_stmt ** ppStmt,char * zSql)724 static int fts5IndexPrepareStmt(
725   Fts5Index *p,
726   sqlite3_stmt **ppStmt,
727   char *zSql
728 ){
729   if( p->rc==SQLITE_OK ){
730     if( zSql ){
731       p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
732           SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
733           ppStmt, 0);
734     }else{
735       p->rc = SQLITE_NOMEM;
736     }
737   }
738   sqlite3_free(zSql);
739   return p->rc;
740 }
741 
742 
743 /*
744 ** INSERT OR REPLACE a record into the %_data table.
745 */
fts5DataWrite(Fts5Index * p,i64 iRowid,const u8 * pData,int nData)746 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
747   if( p->rc!=SQLITE_OK ) return;
748 
749   if( p->pWriter==0 ){
750     Fts5Config *pConfig = p->pConfig;
751     fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
752           "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
753           pConfig->zDb, pConfig->zName
754     ));
755     if( p->rc ) return;
756   }
757 
758   sqlite3_bind_int64(p->pWriter, 1, iRowid);
759   sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
760   sqlite3_step(p->pWriter);
761   p->rc = sqlite3_reset(p->pWriter);
762   sqlite3_bind_null(p->pWriter, 2);
763 }
764 
765 /*
766 ** Execute the following SQL:
767 **
768 **     DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
769 */
fts5DataDelete(Fts5Index * p,i64 iFirst,i64 iLast)770 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
771   if( p->rc!=SQLITE_OK ) return;
772 
773   if( p->pDeleter==0 ){
774     Fts5Config *pConfig = p->pConfig;
775     char *zSql = sqlite3_mprintf(
776         "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
777           pConfig->zDb, pConfig->zName
778     );
779     if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
780   }
781 
782   sqlite3_bind_int64(p->pDeleter, 1, iFirst);
783   sqlite3_bind_int64(p->pDeleter, 2, iLast);
784   sqlite3_step(p->pDeleter);
785   p->rc = sqlite3_reset(p->pDeleter);
786 }
787 
788 /*
789 ** Remove all records associated with segment iSegid.
790 */
fts5DataRemoveSegment(Fts5Index * p,int iSegid)791 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
792   i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
793   i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
794   fts5DataDelete(p, iFirst, iLast);
795   if( p->pIdxDeleter==0 ){
796     Fts5Config *pConfig = p->pConfig;
797     fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
798           "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
799           pConfig->zDb, pConfig->zName
800     ));
801   }
802   if( p->rc==SQLITE_OK ){
803     sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
804     sqlite3_step(p->pIdxDeleter);
805     p->rc = sqlite3_reset(p->pIdxDeleter);
806   }
807 }
808 
809 /*
810 ** Release a reference to an Fts5Structure object returned by an earlier
811 ** call to fts5StructureRead() or fts5StructureDecode().
812 */
fts5StructureRelease(Fts5Structure * pStruct)813 static void fts5StructureRelease(Fts5Structure *pStruct){
814   if( pStruct && 0>=(--pStruct->nRef) ){
815     int i;
816     assert( pStruct->nRef==0 );
817     for(i=0; i<pStruct->nLevel; i++){
818       sqlite3_free(pStruct->aLevel[i].aSeg);
819     }
820     sqlite3_free(pStruct);
821   }
822 }
823 
fts5StructureRef(Fts5Structure * pStruct)824 static void fts5StructureRef(Fts5Structure *pStruct){
825   pStruct->nRef++;
826 }
827 
sqlite3Fts5StructureRef(Fts5Index * p)828 void *sqlite3Fts5StructureRef(Fts5Index *p){
829   fts5StructureRef(p->pStruct);
830   return (void*)p->pStruct;
831 }
sqlite3Fts5StructureRelease(void * p)832 void sqlite3Fts5StructureRelease(void *p){
833   if( p ){
834     fts5StructureRelease((Fts5Structure*)p);
835   }
836 }
sqlite3Fts5StructureTest(Fts5Index * p,void * pStruct)837 int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
838   if( p->pStruct!=(Fts5Structure*)pStruct ){
839     return SQLITE_ABORT;
840   }
841   return SQLITE_OK;
842 }
843 
844 /*
845 ** Ensure that structure object (*pp) is writable.
846 **
847 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
848 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
849 */
fts5StructureMakeWritable(int * pRc,Fts5Structure ** pp)850 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
851   Fts5Structure *p = *pp;
852   if( *pRc==SQLITE_OK && p->nRef>1 ){
853     i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
854     Fts5Structure *pNew;
855     pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
856     if( pNew ){
857       int i;
858       memcpy(pNew, p, nByte);
859       for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
860       for(i=0; i<p->nLevel; i++){
861         Fts5StructureLevel *pLvl = &pNew->aLevel[i];
862         nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
863         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
864         if( pLvl->aSeg==0 ){
865           for(i=0; i<p->nLevel; i++){
866             sqlite3_free(pNew->aLevel[i].aSeg);
867           }
868           sqlite3_free(pNew);
869           return;
870         }
871         memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
872       }
873       p->nRef--;
874       pNew->nRef = 1;
875     }
876     *pp = pNew;
877   }
878 }
879 
880 /*
881 ** Deserialize and return the structure record currently stored in serialized
882 ** form within buffer pData/nData.
883 **
884 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
885 ** are over-allocated by one slot. This allows the structure contents
886 ** to be more easily edited.
887 **
888 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
889 ** returned. Otherwise, *ppOut is set to point to the new object and
890 ** SQLITE_OK returned.
891 */
fts5StructureDecode(const u8 * pData,int nData,int * piCookie,Fts5Structure ** ppOut)892 static int fts5StructureDecode(
893   const u8 *pData,                /* Buffer containing serialized structure */
894   int nData,                      /* Size of buffer pData in bytes */
895   int *piCookie,                  /* Configuration cookie value */
896   Fts5Structure **ppOut           /* OUT: Deserialized object */
897 ){
898   int rc = SQLITE_OK;
899   int i = 0;
900   int iLvl;
901   int nLevel = 0;
902   int nSegment = 0;
903   sqlite3_int64 nByte;            /* Bytes of space to allocate at pRet */
904   Fts5Structure *pRet = 0;        /* Structure object to return */
905 
906   /* Grab the cookie value */
907   if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
908   i = 4;
909 
910   /* Read the total number of levels and segments from the start of the
911   ** structure record.  */
912   i += fts5GetVarint32(&pData[i], nLevel);
913   i += fts5GetVarint32(&pData[i], nSegment);
914   if( nLevel>FTS5_MAX_SEGMENT   || nLevel<0
915    || nSegment>FTS5_MAX_SEGMENT || nSegment<0
916   ){
917     return FTS5_CORRUPT;
918   }
919   nByte = (
920       sizeof(Fts5Structure) +                    /* Main structure */
921       sizeof(Fts5StructureLevel) * (nLevel-1)    /* aLevel[] array */
922   );
923   pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
924 
925   if( pRet ){
926     pRet->nRef = 1;
927     pRet->nLevel = nLevel;
928     pRet->nSegment = nSegment;
929     i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
930 
931     for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
932       Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
933       int nTotal = 0;
934       int iSeg;
935 
936       if( i>=nData ){
937         rc = FTS5_CORRUPT;
938       }else{
939         i += fts5GetVarint32(&pData[i], pLvl->nMerge);
940         i += fts5GetVarint32(&pData[i], nTotal);
941         if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
942         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
943             nTotal * sizeof(Fts5StructureSegment)
944         );
945         nSegment -= nTotal;
946       }
947 
948       if( rc==SQLITE_OK ){
949         pLvl->nSeg = nTotal;
950         for(iSeg=0; iSeg<nTotal; iSeg++){
951           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
952           if( i>=nData ){
953             rc = FTS5_CORRUPT;
954             break;
955           }
956           i += fts5GetVarint32(&pData[i], pSeg->iSegid);
957           i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
958           i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
959           if( pSeg->pgnoLast<pSeg->pgnoFirst ){
960             rc = FTS5_CORRUPT;
961             break;
962           }
963         }
964         if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
965         if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
966       }
967     }
968     if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
969 
970     if( rc!=SQLITE_OK ){
971       fts5StructureRelease(pRet);
972       pRet = 0;
973     }
974   }
975 
976   *ppOut = pRet;
977   return rc;
978 }
979 
980 /*
981 ** Add a level to the Fts5Structure.aLevel[] array of structure object
982 ** (*ppStruct).
983 */
fts5StructureAddLevel(int * pRc,Fts5Structure ** ppStruct)984 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
985   fts5StructureMakeWritable(pRc, ppStruct);
986   if( *pRc==SQLITE_OK ){
987     Fts5Structure *pStruct = *ppStruct;
988     int nLevel = pStruct->nLevel;
989     sqlite3_int64 nByte = (
990         sizeof(Fts5Structure) +                  /* Main structure */
991         sizeof(Fts5StructureLevel) * (nLevel+1)  /* aLevel[] array */
992     );
993 
994     pStruct = sqlite3_realloc64(pStruct, nByte);
995     if( pStruct ){
996       memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
997       pStruct->nLevel++;
998       *ppStruct = pStruct;
999     }else{
1000       *pRc = SQLITE_NOMEM;
1001     }
1002   }
1003 }
1004 
1005 /*
1006 ** Extend level iLvl so that there is room for at least nExtra more
1007 ** segments.
1008 */
fts5StructureExtendLevel(int * pRc,Fts5Structure * pStruct,int iLvl,int nExtra,int bInsert)1009 static void fts5StructureExtendLevel(
1010   int *pRc,
1011   Fts5Structure *pStruct,
1012   int iLvl,
1013   int nExtra,
1014   int bInsert
1015 ){
1016   if( *pRc==SQLITE_OK ){
1017     Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1018     Fts5StructureSegment *aNew;
1019     sqlite3_int64 nByte;
1020 
1021     nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
1022     aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
1023     if( aNew ){
1024       if( bInsert==0 ){
1025         memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
1026       }else{
1027         int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
1028         memmove(&aNew[nExtra], aNew, nMove);
1029         memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
1030       }
1031       pLvl->aSeg = aNew;
1032     }else{
1033       *pRc = SQLITE_NOMEM;
1034     }
1035   }
1036 }
1037 
fts5StructureReadUncached(Fts5Index * p)1038 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
1039   Fts5Structure *pRet = 0;
1040   Fts5Config *pConfig = p->pConfig;
1041   int iCookie;                    /* Configuration cookie */
1042   Fts5Data *pData;
1043 
1044   pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
1045   if( p->rc==SQLITE_OK ){
1046     /* TODO: Do we need this if the leaf-index is appended? Probably... */
1047     memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
1048     p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
1049     if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
1050       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
1051     }
1052     fts5DataRelease(pData);
1053     if( p->rc!=SQLITE_OK ){
1054       fts5StructureRelease(pRet);
1055       pRet = 0;
1056     }
1057   }
1058 
1059   return pRet;
1060 }
1061 
fts5IndexDataVersion(Fts5Index * p)1062 static i64 fts5IndexDataVersion(Fts5Index *p){
1063   i64 iVersion = 0;
1064 
1065   if( p->rc==SQLITE_OK ){
1066     if( p->pDataVersion==0 ){
1067       p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
1068           sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
1069           );
1070       if( p->rc ) return 0;
1071     }
1072 
1073     if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
1074       iVersion = sqlite3_column_int64(p->pDataVersion, 0);
1075     }
1076     p->rc = sqlite3_reset(p->pDataVersion);
1077   }
1078 
1079   return iVersion;
1080 }
1081 
1082 /*
1083 ** Read, deserialize and return the structure record.
1084 **
1085 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
1086 ** are over-allocated as described for function fts5StructureDecode()
1087 ** above.
1088 **
1089 ** If an error occurs, NULL is returned and an error code left in the
1090 ** Fts5Index handle. If an error has already occurred when this function
1091 ** is called, it is a no-op.
1092 */
fts5StructureRead(Fts5Index * p)1093 static Fts5Structure *fts5StructureRead(Fts5Index *p){
1094 
1095   if( p->pStruct==0 ){
1096     p->iStructVersion = fts5IndexDataVersion(p);
1097     if( p->rc==SQLITE_OK ){
1098       p->pStruct = fts5StructureReadUncached(p);
1099     }
1100   }
1101 
1102 #if 0
1103   else{
1104     Fts5Structure *pTest = fts5StructureReadUncached(p);
1105     if( pTest ){
1106       int i, j;
1107       assert_nc( p->pStruct->nSegment==pTest->nSegment );
1108       assert_nc( p->pStruct->nLevel==pTest->nLevel );
1109       for(i=0; i<pTest->nLevel; i++){
1110         assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
1111         assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
1112         for(j=0; j<pTest->aLevel[i].nSeg; j++){
1113           Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
1114           Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
1115           assert_nc( p1->iSegid==p2->iSegid );
1116           assert_nc( p1->pgnoFirst==p2->pgnoFirst );
1117           assert_nc( p1->pgnoLast==p2->pgnoLast );
1118         }
1119       }
1120       fts5StructureRelease(pTest);
1121     }
1122   }
1123 #endif
1124 
1125   if( p->rc!=SQLITE_OK ) return 0;
1126   assert( p->iStructVersion!=0 );
1127   assert( p->pStruct!=0 );
1128   fts5StructureRef(p->pStruct);
1129   return p->pStruct;
1130 }
1131 
fts5StructureInvalidate(Fts5Index * p)1132 static void fts5StructureInvalidate(Fts5Index *p){
1133   if( p->pStruct ){
1134     fts5StructureRelease(p->pStruct);
1135     p->pStruct = 0;
1136   }
1137 }
1138 
1139 /*
1140 ** Return the total number of segments in index structure pStruct. This
1141 ** function is only ever used as part of assert() conditions.
1142 */
1143 #ifdef SQLITE_DEBUG
fts5StructureCountSegments(Fts5Structure * pStruct)1144 static int fts5StructureCountSegments(Fts5Structure *pStruct){
1145   int nSegment = 0;               /* Total number of segments */
1146   if( pStruct ){
1147     int iLvl;                     /* Used to iterate through levels */
1148     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1149       nSegment += pStruct->aLevel[iLvl].nSeg;
1150     }
1151   }
1152 
1153   return nSegment;
1154 }
1155 #endif
1156 
1157 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) {     \
1158   assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) );             \
1159   memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob);             \
1160   (pBuf)->n += nBlob;                                      \
1161 }
1162 
1163 #define fts5BufferSafeAppendVarint(pBuf, iVal) {                \
1164   (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal));  \
1165   assert( (pBuf)->nSpace>=(pBuf)->n );                          \
1166 }
1167 
1168 
1169 /*
1170 ** Serialize and store the "structure" record.
1171 **
1172 ** If an error occurs, leave an error code in the Fts5Index object. If an
1173 ** error has already occurred, this function is a no-op.
1174 */
fts5StructureWrite(Fts5Index * p,Fts5Structure * pStruct)1175 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
1176   if( p->rc==SQLITE_OK ){
1177     Fts5Buffer buf;               /* Buffer to serialize record into */
1178     int iLvl;                     /* Used to iterate through levels */
1179     int iCookie;                  /* Cookie value to store */
1180 
1181     assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
1182     memset(&buf, 0, sizeof(Fts5Buffer));
1183 
1184     /* Append the current configuration cookie */
1185     iCookie = p->pConfig->iCookie;
1186     if( iCookie<0 ) iCookie = 0;
1187 
1188     if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
1189       sqlite3Fts5Put32(buf.p, iCookie);
1190       buf.n = 4;
1191       fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
1192       fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
1193       fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
1194     }
1195 
1196     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
1197       int iSeg;                     /* Used to iterate through segments */
1198       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
1199       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
1200       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
1201       assert( pLvl->nMerge<=pLvl->nSeg );
1202 
1203       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
1204         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
1205         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
1206         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
1207       }
1208     }
1209 
1210     fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
1211     fts5BufferFree(&buf);
1212   }
1213 }
1214 
1215 #if 0
1216 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
1217 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
1218   int rc = SQLITE_OK;
1219   Fts5Buffer buf;
1220   memset(&buf, 0, sizeof(buf));
1221   fts5DebugStructure(&rc, &buf, pStruct);
1222   fprintf(stdout, "%s: %s\n", zCaption, buf.p);
1223   fflush(stdout);
1224   fts5BufferFree(&buf);
1225 }
1226 #else
1227 # define fts5PrintStructure(x,y)
1228 #endif
1229 
fts5SegmentSize(Fts5StructureSegment * pSeg)1230 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
1231   return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
1232 }
1233 
1234 /*
1235 ** Return a copy of index structure pStruct. Except, promote as many
1236 ** segments as possible to level iPromote. If an OOM occurs, NULL is
1237 ** returned.
1238 */
fts5StructurePromoteTo(Fts5Index * p,int iPromote,int szPromote,Fts5Structure * pStruct)1239 static void fts5StructurePromoteTo(
1240   Fts5Index *p,
1241   int iPromote,
1242   int szPromote,
1243   Fts5Structure *pStruct
1244 ){
1245   int il, is;
1246   Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
1247 
1248   if( pOut->nMerge==0 ){
1249     for(il=iPromote+1; il<pStruct->nLevel; il++){
1250       Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
1251       if( pLvl->nMerge ) return;
1252       for(is=pLvl->nSeg-1; is>=0; is--){
1253         int sz = fts5SegmentSize(&pLvl->aSeg[is]);
1254         if( sz>szPromote ) return;
1255         fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
1256         if( p->rc ) return;
1257         memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
1258         pOut->nSeg++;
1259         pLvl->nSeg--;
1260       }
1261     }
1262   }
1263 }
1264 
1265 /*
1266 ** A new segment has just been written to level iLvl of index structure
1267 ** pStruct. This function determines if any segments should be promoted
1268 ** as a result. Segments are promoted in two scenarios:
1269 **
1270 **   a) If the segment just written is smaller than one or more segments
1271 **      within the previous populated level, it is promoted to the previous
1272 **      populated level.
1273 **
1274 **   b) If the segment just written is larger than the newest segment on
1275 **      the next populated level, then that segment, and any other adjacent
1276 **      segments that are also smaller than the one just written, are
1277 **      promoted.
1278 **
1279 ** If one or more segments are promoted, the structure object is updated
1280 ** to reflect this.
1281 */
fts5StructurePromote(Fts5Index * p,int iLvl,Fts5Structure * pStruct)1282 static void fts5StructurePromote(
1283   Fts5Index *p,                   /* FTS5 backend object */
1284   int iLvl,                       /* Index level just updated */
1285   Fts5Structure *pStruct          /* Index structure */
1286 ){
1287   if( p->rc==SQLITE_OK ){
1288     int iTst;
1289     int iPromote = -1;
1290     int szPromote = 0;            /* Promote anything this size or smaller */
1291     Fts5StructureSegment *pSeg;   /* Segment just written */
1292     int szSeg;                    /* Size of segment just written */
1293     int nSeg = pStruct->aLevel[iLvl].nSeg;
1294 
1295     if( nSeg==0 ) return;
1296     pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
1297     szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
1298 
1299     /* Check for condition (a) */
1300     for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
1301     if( iTst>=0 ){
1302       int i;
1303       int szMax = 0;
1304       Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
1305       assert( pTst->nMerge==0 );
1306       for(i=0; i<pTst->nSeg; i++){
1307         int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
1308         if( sz>szMax ) szMax = sz;
1309       }
1310       if( szMax>=szSeg ){
1311         /* Condition (a) is true. Promote the newest segment on level
1312         ** iLvl to level iTst.  */
1313         iPromote = iTst;
1314         szPromote = szMax;
1315       }
1316     }
1317 
1318     /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
1319     ** is a no-op if it is not.  */
1320     if( iPromote<0 ){
1321       iPromote = iLvl;
1322       szPromote = szSeg;
1323     }
1324     fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
1325   }
1326 }
1327 
1328 
1329 /*
1330 ** Advance the iterator passed as the only argument. If the end of the
1331 ** doclist-index page is reached, return non-zero.
1332 */
fts5DlidxLvlNext(Fts5DlidxLvl * pLvl)1333 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
1334   Fts5Data *pData = pLvl->pData;
1335 
1336   if( pLvl->iOff==0 ){
1337     assert( pLvl->bEof==0 );
1338     pLvl->iOff = 1;
1339     pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
1340     pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
1341     pLvl->iFirstOff = pLvl->iOff;
1342   }else{
1343     int iOff;
1344     for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
1345       if( pData->p[iOff] ) break;
1346     }
1347 
1348     if( iOff<pData->nn ){
1349       i64 iVal;
1350       pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
1351       iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
1352       pLvl->iRowid += iVal;
1353       pLvl->iOff = iOff;
1354     }else{
1355       pLvl->bEof = 1;
1356     }
1357   }
1358 
1359   return pLvl->bEof;
1360 }
1361 
1362 /*
1363 ** Advance the iterator passed as the only argument.
1364 */
fts5DlidxIterNextR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1365 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1366   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1367 
1368   assert( iLvl<pIter->nLvl );
1369   if( fts5DlidxLvlNext(pLvl) ){
1370     if( (iLvl+1) < pIter->nLvl ){
1371       fts5DlidxIterNextR(p, pIter, iLvl+1);
1372       if( pLvl[1].bEof==0 ){
1373         fts5DataRelease(pLvl->pData);
1374         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1375         pLvl->pData = fts5DataRead(p,
1376             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1377         );
1378         if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
1379       }
1380     }
1381   }
1382 
1383   return pIter->aLvl[0].bEof;
1384 }
fts5DlidxIterNext(Fts5Index * p,Fts5DlidxIter * pIter)1385 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
1386   return fts5DlidxIterNextR(p, pIter, 0);
1387 }
1388 
1389 /*
1390 ** The iterator passed as the first argument has the following fields set
1391 ** as follows. This function sets up the rest of the iterator so that it
1392 ** points to the first rowid in the doclist-index.
1393 **
1394 **   pData:
1395 **     pointer to doclist-index record,
1396 **
1397 ** When this function is called pIter->iLeafPgno is the page number the
1398 ** doclist is associated with (the one featuring the term).
1399 */
fts5DlidxIterFirst(Fts5DlidxIter * pIter)1400 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
1401   int i;
1402   for(i=0; i<pIter->nLvl; i++){
1403     fts5DlidxLvlNext(&pIter->aLvl[i]);
1404   }
1405   return pIter->aLvl[0].bEof;
1406 }
1407 
1408 
fts5DlidxIterEof(Fts5Index * p,Fts5DlidxIter * pIter)1409 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
1410   return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
1411 }
1412 
fts5DlidxIterLast(Fts5Index * p,Fts5DlidxIter * pIter)1413 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
1414   int i;
1415 
1416   /* Advance each level to the last entry on the last page */
1417   for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
1418     Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
1419     while( fts5DlidxLvlNext(pLvl)==0 );
1420     pLvl->bEof = 0;
1421 
1422     if( i>0 ){
1423       Fts5DlidxLvl *pChild = &pLvl[-1];
1424       fts5DataRelease(pChild->pData);
1425       memset(pChild, 0, sizeof(Fts5DlidxLvl));
1426       pChild->pData = fts5DataRead(p,
1427           FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
1428       );
1429     }
1430   }
1431 }
1432 
1433 /*
1434 ** Move the iterator passed as the only argument to the previous entry.
1435 */
fts5DlidxLvlPrev(Fts5DlidxLvl * pLvl)1436 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
1437   int iOff = pLvl->iOff;
1438 
1439   assert( pLvl->bEof==0 );
1440   if( iOff<=pLvl->iFirstOff ){
1441     pLvl->bEof = 1;
1442   }else{
1443     u8 *a = pLvl->pData->p;
1444     i64 iVal;
1445     int iLimit;
1446     int ii;
1447     int nZero = 0;
1448 
1449     /* Currently iOff points to the first byte of a varint. This block
1450     ** decrements iOff until it points to the first byte of the previous
1451     ** varint. Taking care not to read any memory locations that occur
1452     ** before the buffer in memory.  */
1453     iLimit = (iOff>9 ? iOff-9 : 0);
1454     for(iOff--; iOff>iLimit; iOff--){
1455       if( (a[iOff-1] & 0x80)==0 ) break;
1456     }
1457 
1458     fts5GetVarint(&a[iOff], (u64*)&iVal);
1459     pLvl->iRowid -= iVal;
1460     pLvl->iLeafPgno--;
1461 
1462     /* Skip backwards past any 0x00 varints. */
1463     for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
1464       nZero++;
1465     }
1466     if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
1467       /* The byte immediately before the last 0x00 byte has the 0x80 bit
1468       ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
1469       ** bytes before a[ii]. */
1470       int bZero = 0;              /* True if last 0x00 counts */
1471       if( (ii-8)>=pLvl->iFirstOff ){
1472         int j;
1473         for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
1474         bZero = (j>8);
1475       }
1476       if( bZero==0 ) nZero--;
1477     }
1478     pLvl->iLeafPgno -= nZero;
1479     pLvl->iOff = iOff - nZero;
1480   }
1481 
1482   return pLvl->bEof;
1483 }
1484 
fts5DlidxIterPrevR(Fts5Index * p,Fts5DlidxIter * pIter,int iLvl)1485 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
1486   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
1487 
1488   assert( iLvl<pIter->nLvl );
1489   if( fts5DlidxLvlPrev(pLvl) ){
1490     if( (iLvl+1) < pIter->nLvl ){
1491       fts5DlidxIterPrevR(p, pIter, iLvl+1);
1492       if( pLvl[1].bEof==0 ){
1493         fts5DataRelease(pLvl->pData);
1494         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1495         pLvl->pData = fts5DataRead(p,
1496             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
1497         );
1498         if( pLvl->pData ){
1499           while( fts5DlidxLvlNext(pLvl)==0 );
1500           pLvl->bEof = 0;
1501         }
1502       }
1503     }
1504   }
1505 
1506   return pIter->aLvl[0].bEof;
1507 }
fts5DlidxIterPrev(Fts5Index * p,Fts5DlidxIter * pIter)1508 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
1509   return fts5DlidxIterPrevR(p, pIter, 0);
1510 }
1511 
1512 /*
1513 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
1514 */
fts5DlidxIterFree(Fts5DlidxIter * pIter)1515 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
1516   if( pIter ){
1517     int i;
1518     for(i=0; i<pIter->nLvl; i++){
1519       fts5DataRelease(pIter->aLvl[i].pData);
1520     }
1521     sqlite3_free(pIter);
1522   }
1523 }
1524 
fts5DlidxIterInit(Fts5Index * p,int bRev,int iSegid,int iLeafPg)1525 static Fts5DlidxIter *fts5DlidxIterInit(
1526   Fts5Index *p,                   /* Fts5 Backend to iterate within */
1527   int bRev,                       /* True for ORDER BY ASC */
1528   int iSegid,                     /* Segment id */
1529   int iLeafPg                     /* Leaf page number to load dlidx for */
1530 ){
1531   Fts5DlidxIter *pIter = 0;
1532   int i;
1533   int bDone = 0;
1534 
1535   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
1536     sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
1537     Fts5DlidxIter *pNew;
1538 
1539     pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
1540     if( pNew==0 ){
1541       p->rc = SQLITE_NOMEM;
1542     }else{
1543       i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
1544       Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
1545       pIter = pNew;
1546       memset(pLvl, 0, sizeof(Fts5DlidxLvl));
1547       pLvl->pData = fts5DataRead(p, iRowid);
1548       if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
1549         bDone = 1;
1550       }
1551       pIter->nLvl = i+1;
1552     }
1553   }
1554 
1555   if( p->rc==SQLITE_OK ){
1556     pIter->iSegid = iSegid;
1557     if( bRev==0 ){
1558       fts5DlidxIterFirst(pIter);
1559     }else{
1560       fts5DlidxIterLast(p, pIter);
1561     }
1562   }
1563 
1564   if( p->rc!=SQLITE_OK ){
1565     fts5DlidxIterFree(pIter);
1566     pIter = 0;
1567   }
1568 
1569   return pIter;
1570 }
1571 
fts5DlidxIterRowid(Fts5DlidxIter * pIter)1572 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
1573   return pIter->aLvl[0].iRowid;
1574 }
fts5DlidxIterPgno(Fts5DlidxIter * pIter)1575 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
1576   return pIter->aLvl[0].iLeafPgno;
1577 }
1578 
1579 /*
1580 ** Load the next leaf page into the segment iterator.
1581 */
fts5SegIterNextPage(Fts5Index * p,Fts5SegIter * pIter)1582 static void fts5SegIterNextPage(
1583   Fts5Index *p,                   /* FTS5 backend object */
1584   Fts5SegIter *pIter              /* Iterator to advance to next page */
1585 ){
1586   Fts5Data *pLeaf;
1587   Fts5StructureSegment *pSeg = pIter->pSeg;
1588   fts5DataRelease(pIter->pLeaf);
1589   pIter->iLeafPgno++;
1590   if( pIter->pNextLeaf ){
1591     pIter->pLeaf = pIter->pNextLeaf;
1592     pIter->pNextLeaf = 0;
1593   }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
1594     pIter->pLeaf = fts5LeafRead(p,
1595         FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
1596     );
1597   }else{
1598     pIter->pLeaf = 0;
1599   }
1600   pLeaf = pIter->pLeaf;
1601 
1602   if( pLeaf ){
1603     pIter->iPgidxOff = pLeaf->szLeaf;
1604     if( fts5LeafIsTermless(pLeaf) ){
1605       pIter->iEndofDoclist = pLeaf->nn+1;
1606     }else{
1607       pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
1608           pIter->iEndofDoclist
1609       );
1610     }
1611   }
1612 }
1613 
1614 /*
1615 ** Argument p points to a buffer containing a varint to be interpreted as a
1616 ** position list size field. Read the varint and return the number of bytes
1617 ** read. Before returning, set *pnSz to the number of bytes in the position
1618 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
1619 */
fts5GetPoslistSize(const u8 * p,int * pnSz,int * pbDel)1620 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
1621   int nSz;
1622   int n = 0;
1623   fts5FastGetVarint32(p, n, nSz);
1624   assert_nc( nSz>=0 );
1625   *pnSz = nSz/2;
1626   *pbDel = nSz & 0x0001;
1627   return n;
1628 }
1629 
1630 /*
1631 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
1632 ** position-list size field. Read the value of the field and store it
1633 ** in the following variables:
1634 **
1635 **   Fts5SegIter.nPos
1636 **   Fts5SegIter.bDel
1637 **
1638 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
1639 ** position list content (if any).
1640 */
fts5SegIterLoadNPos(Fts5Index * p,Fts5SegIter * pIter)1641 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
1642   if( p->rc==SQLITE_OK ){
1643     int iOff = pIter->iLeafOffset;  /* Offset to read at */
1644     ASSERT_SZLEAF_OK(pIter->pLeaf);
1645     if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1646       int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
1647       pIter->bDel = 0;
1648       pIter->nPos = 1;
1649       if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1650         pIter->bDel = 1;
1651         iOff++;
1652         if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
1653           pIter->nPos = 1;
1654           iOff++;
1655         }else{
1656           pIter->nPos = 0;
1657         }
1658       }
1659     }else{
1660       int nSz;
1661       fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
1662       pIter->bDel = (nSz & 0x0001);
1663       pIter->nPos = nSz>>1;
1664       assert_nc( pIter->nPos>=0 );
1665     }
1666     pIter->iLeafOffset = iOff;
1667   }
1668 }
1669 
fts5SegIterLoadRowid(Fts5Index * p,Fts5SegIter * pIter)1670 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
1671   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1672   i64 iOff = pIter->iLeafOffset;
1673 
1674   ASSERT_SZLEAF_OK(pIter->pLeaf);
1675   if( iOff>=pIter->pLeaf->szLeaf ){
1676     fts5SegIterNextPage(p, pIter);
1677     if( pIter->pLeaf==0 ){
1678       if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
1679       return;
1680     }
1681     iOff = 4;
1682     a = pIter->pLeaf->p;
1683   }
1684   iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
1685   pIter->iLeafOffset = iOff;
1686 }
1687 
1688 /*
1689 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
1690 ** "nSuffix" field of a term. Function parameter nKeep contains the value
1691 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
1692 ** the first term in the segment).
1693 **
1694 ** This function populates:
1695 **
1696 **   Fts5SegIter.term
1697 **   Fts5SegIter.rowid
1698 **
1699 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
1700 ** the first position list. The position list belonging to document
1701 ** (Fts5SegIter.iRowid).
1702 */
fts5SegIterLoadTerm(Fts5Index * p,Fts5SegIter * pIter,int nKeep)1703 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
1704   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
1705   i64 iOff = pIter->iLeafOffset;  /* Offset to read at */
1706   int nNew;                       /* Bytes of new data */
1707 
1708   iOff += fts5GetVarint32(&a[iOff], nNew);
1709   if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
1710     p->rc = FTS5_CORRUPT;
1711     return;
1712   }
1713   pIter->term.n = nKeep;
1714   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
1715   assert( pIter->term.n<=pIter->term.nSpace );
1716   iOff += nNew;
1717   pIter->iTermLeafOffset = iOff;
1718   pIter->iTermLeafPgno = pIter->iLeafPgno;
1719   pIter->iLeafOffset = iOff;
1720 
1721   if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
1722     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1723   }else{
1724     int nExtra;
1725     pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
1726     pIter->iEndofDoclist += nExtra;
1727   }
1728 
1729   fts5SegIterLoadRowid(p, pIter);
1730 }
1731 
1732 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
1733 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
1734 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
1735 
fts5SegIterSetNext(Fts5Index * p,Fts5SegIter * pIter)1736 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
1737   if( pIter->flags & FTS5_SEGITER_REVERSE ){
1738     pIter->xNext = fts5SegIterNext_Reverse;
1739   }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
1740     pIter->xNext = fts5SegIterNext_None;
1741   }else{
1742     pIter->xNext = fts5SegIterNext;
1743   }
1744 }
1745 
1746 /*
1747 ** Initialize the iterator object pIter to iterate through the entries in
1748 ** segment pSeg. The iterator is left pointing to the first entry when
1749 ** this function returns.
1750 **
1751 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
1752 ** an error has already occurred when this function is called, it is a no-op.
1753 */
fts5SegIterInit(Fts5Index * p,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)1754 static void fts5SegIterInit(
1755   Fts5Index *p,                   /* FTS index object */
1756   Fts5StructureSegment *pSeg,     /* Description of segment */
1757   Fts5SegIter *pIter              /* Object to populate */
1758 ){
1759   if( pSeg->pgnoFirst==0 ){
1760     /* This happens if the segment is being used as an input to an incremental
1761     ** merge and all data has already been "trimmed". See function
1762     ** fts5TrimSegments() for details. In this case leave the iterator empty.
1763     ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
1764     ** at EOF already. */
1765     assert( pIter->pLeaf==0 );
1766     return;
1767   }
1768 
1769   if( p->rc==SQLITE_OK ){
1770     memset(pIter, 0, sizeof(*pIter));
1771     fts5SegIterSetNext(p, pIter);
1772     pIter->pSeg = pSeg;
1773     pIter->iLeafPgno = pSeg->pgnoFirst-1;
1774     fts5SegIterNextPage(p, pIter);
1775   }
1776 
1777   if( p->rc==SQLITE_OK ){
1778     pIter->iLeafOffset = 4;
1779     assert( pIter->pLeaf!=0 );
1780     assert_nc( pIter->pLeaf->nn>4 );
1781     assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
1782     pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
1783     fts5SegIterLoadTerm(p, pIter, 0);
1784     fts5SegIterLoadNPos(p, pIter);
1785   }
1786 }
1787 
1788 /*
1789 ** This function is only ever called on iterators created by calls to
1790 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
1791 **
1792 ** The iterator is in an unusual state when this function is called: the
1793 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
1794 ** the position-list size field for the first relevant rowid on the page.
1795 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
1796 **
1797 ** This function advances the iterator so that it points to the last
1798 ** relevant rowid on the page and, if necessary, initializes the
1799 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
1800 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
1801 ** byte of the position list content associated with said rowid.
1802 */
fts5SegIterReverseInitPage(Fts5Index * p,Fts5SegIter * pIter)1803 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
1804   int eDetail = p->pConfig->eDetail;
1805   int n = pIter->pLeaf->szLeaf;
1806   int i = pIter->iLeafOffset;
1807   u8 *a = pIter->pLeaf->p;
1808   int iRowidOffset = 0;
1809 
1810   if( n>pIter->iEndofDoclist ){
1811     n = pIter->iEndofDoclist;
1812   }
1813 
1814   ASSERT_SZLEAF_OK(pIter->pLeaf);
1815   while( 1 ){
1816     u64 iDelta = 0;
1817 
1818     if( eDetail==FTS5_DETAIL_NONE ){
1819       /* todo */
1820       if( i<n && a[i]==0 ){
1821         i++;
1822         if( i<n && a[i]==0 ) i++;
1823       }
1824     }else{
1825       int nPos;
1826       int bDummy;
1827       i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
1828       i += nPos;
1829     }
1830     if( i>=n ) break;
1831     i += fts5GetVarint(&a[i], &iDelta);
1832     pIter->iRowid += iDelta;
1833 
1834     /* If necessary, grow the pIter->aRowidOffset[] array. */
1835     if( iRowidOffset>=pIter->nRowidOffset ){
1836       int nNew = pIter->nRowidOffset + 8;
1837       int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
1838       if( aNew==0 ){
1839         p->rc = SQLITE_NOMEM;
1840         break;
1841       }
1842       pIter->aRowidOffset = aNew;
1843       pIter->nRowidOffset = nNew;
1844     }
1845 
1846     pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
1847     pIter->iLeafOffset = i;
1848   }
1849   pIter->iRowidOffset = iRowidOffset;
1850   fts5SegIterLoadNPos(p, pIter);
1851 }
1852 
1853 /*
1854 **
1855 */
fts5SegIterReverseNewPage(Fts5Index * p,Fts5SegIter * pIter)1856 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
1857   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1858   assert( pIter->flags & FTS5_SEGITER_ONETERM );
1859 
1860   fts5DataRelease(pIter->pLeaf);
1861   pIter->pLeaf = 0;
1862   while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
1863     Fts5Data *pNew;
1864     pIter->iLeafPgno--;
1865     pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
1866           pIter->pSeg->iSegid, pIter->iLeafPgno
1867     ));
1868     if( pNew ){
1869       /* iTermLeafOffset may be equal to szLeaf if the term is the last
1870       ** thing on the page - i.e. the first rowid is on the following page.
1871       ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
1872       if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
1873         assert( pIter->pLeaf==0 );
1874         if( pIter->iTermLeafOffset<pNew->szLeaf ){
1875           pIter->pLeaf = pNew;
1876           pIter->iLeafOffset = pIter->iTermLeafOffset;
1877         }
1878       }else{
1879         int iRowidOff;
1880         iRowidOff = fts5LeafFirstRowidOff(pNew);
1881         if( iRowidOff ){
1882           if( iRowidOff>=pNew->szLeaf ){
1883             p->rc = FTS5_CORRUPT;
1884           }else{
1885             pIter->pLeaf = pNew;
1886             pIter->iLeafOffset = iRowidOff;
1887           }
1888         }
1889       }
1890 
1891       if( pIter->pLeaf ){
1892         u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
1893         pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
1894         break;
1895       }else{
1896         fts5DataRelease(pNew);
1897       }
1898     }
1899   }
1900 
1901   if( pIter->pLeaf ){
1902     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
1903     fts5SegIterReverseInitPage(p, pIter);
1904   }
1905 }
1906 
1907 /*
1908 ** Return true if the iterator passed as the second argument currently
1909 ** points to a delete marker. A delete marker is an entry with a 0 byte
1910 ** position-list.
1911 */
fts5MultiIterIsEmpty(Fts5Index * p,Fts5Iter * pIter)1912 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
1913   Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
1914   return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
1915 }
1916 
1917 /*
1918 ** Advance iterator pIter to the next entry.
1919 **
1920 ** This version of fts5SegIterNext() is only used by reverse iterators.
1921 */
fts5SegIterNext_Reverse(Fts5Index * p,Fts5SegIter * pIter,int * pbUnused)1922 static void fts5SegIterNext_Reverse(
1923   Fts5Index *p,                   /* FTS5 backend object */
1924   Fts5SegIter *pIter,             /* Iterator to advance */
1925   int *pbUnused                   /* Unused */
1926 ){
1927   assert( pIter->flags & FTS5_SEGITER_REVERSE );
1928   assert( pIter->pNextLeaf==0 );
1929   UNUSED_PARAM(pbUnused);
1930 
1931   if( pIter->iRowidOffset>0 ){
1932     u8 *a = pIter->pLeaf->p;
1933     int iOff;
1934     u64 iDelta;
1935 
1936     pIter->iRowidOffset--;
1937     pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
1938     fts5SegIterLoadNPos(p, pIter);
1939     iOff = pIter->iLeafOffset;
1940     if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
1941       iOff += pIter->nPos;
1942     }
1943     fts5GetVarint(&a[iOff], &iDelta);
1944     pIter->iRowid -= iDelta;
1945   }else{
1946     fts5SegIterReverseNewPage(p, pIter);
1947   }
1948 }
1949 
1950 /*
1951 ** Advance iterator pIter to the next entry.
1952 **
1953 ** This version of fts5SegIterNext() is only used if detail=none and the
1954 ** iterator is not a reverse direction iterator.
1955 */
fts5SegIterNext_None(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)1956 static void fts5SegIterNext_None(
1957   Fts5Index *p,                   /* FTS5 backend object */
1958   Fts5SegIter *pIter,             /* Iterator to advance */
1959   int *pbNewTerm                  /* OUT: Set for new term */
1960 ){
1961   int iOff;
1962 
1963   assert( p->rc==SQLITE_OK );
1964   assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
1965   assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
1966 
1967   ASSERT_SZLEAF_OK(pIter->pLeaf);
1968   iOff = pIter->iLeafOffset;
1969 
1970   /* Next entry is on the next page */
1971   if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
1972     fts5SegIterNextPage(p, pIter);
1973     if( p->rc || pIter->pLeaf==0 ) return;
1974     pIter->iRowid = 0;
1975     iOff = 4;
1976   }
1977 
1978   if( iOff<pIter->iEndofDoclist ){
1979     /* Next entry is on the current page */
1980     i64 iDelta;
1981     iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
1982     pIter->iLeafOffset = iOff;
1983     pIter->iRowid += iDelta;
1984   }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
1985     if( pIter->pSeg ){
1986       int nKeep = 0;
1987       if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
1988         iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
1989       }
1990       pIter->iLeafOffset = iOff;
1991       fts5SegIterLoadTerm(p, pIter, nKeep);
1992     }else{
1993       const u8 *pList = 0;
1994       const char *zTerm = 0;
1995       int nList;
1996       sqlite3Fts5HashScanNext(p->pHash);
1997       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
1998       if( pList==0 ) goto next_none_eof;
1999       pIter->pLeaf->p = (u8*)pList;
2000       pIter->pLeaf->nn = nList;
2001       pIter->pLeaf->szLeaf = nList;
2002       pIter->iEndofDoclist = nList;
2003       sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
2004       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2005     }
2006 
2007     if( pbNewTerm ) *pbNewTerm = 1;
2008   }else{
2009     goto next_none_eof;
2010   }
2011 
2012   fts5SegIterLoadNPos(p, pIter);
2013 
2014   return;
2015  next_none_eof:
2016   fts5DataRelease(pIter->pLeaf);
2017   pIter->pLeaf = 0;
2018 }
2019 
2020 
2021 /*
2022 ** Advance iterator pIter to the next entry.
2023 **
2024 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
2025 ** is not considered an error if the iterator reaches EOF. If an error has
2026 ** already occurred when this function is called, it is a no-op.
2027 */
fts5SegIterNext(Fts5Index * p,Fts5SegIter * pIter,int * pbNewTerm)2028 static void fts5SegIterNext(
2029   Fts5Index *p,                   /* FTS5 backend object */
2030   Fts5SegIter *pIter,             /* Iterator to advance */
2031   int *pbNewTerm                  /* OUT: Set for new term */
2032 ){
2033   Fts5Data *pLeaf = pIter->pLeaf;
2034   int iOff;
2035   int bNewTerm = 0;
2036   int nKeep = 0;
2037   u8 *a;
2038   int n;
2039 
2040   assert( pbNewTerm==0 || *pbNewTerm==0 );
2041   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
2042 
2043   /* Search for the end of the position list within the current page. */
2044   a = pLeaf->p;
2045   n = pLeaf->szLeaf;
2046 
2047   ASSERT_SZLEAF_OK(pLeaf);
2048   iOff = pIter->iLeafOffset + pIter->nPos;
2049 
2050   if( iOff<n ){
2051     /* The next entry is on the current page. */
2052     assert_nc( iOff<=pIter->iEndofDoclist );
2053     if( iOff>=pIter->iEndofDoclist ){
2054       bNewTerm = 1;
2055       if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
2056         iOff += fts5GetVarint32(&a[iOff], nKeep);
2057       }
2058     }else{
2059       u64 iDelta;
2060       iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
2061       pIter->iRowid += iDelta;
2062       assert_nc( iDelta>0 );
2063     }
2064     pIter->iLeafOffset = iOff;
2065 
2066   }else if( pIter->pSeg==0 ){
2067     const u8 *pList = 0;
2068     const char *zTerm = 0;
2069     int nList = 0;
2070     assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
2071     if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
2072       sqlite3Fts5HashScanNext(p->pHash);
2073       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
2074     }
2075     if( pList==0 ){
2076       fts5DataRelease(pIter->pLeaf);
2077       pIter->pLeaf = 0;
2078     }else{
2079       pIter->pLeaf->p = (u8*)pList;
2080       pIter->pLeaf->nn = nList;
2081       pIter->pLeaf->szLeaf = nList;
2082       pIter->iEndofDoclist = nList+1;
2083       sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
2084           (u8*)zTerm);
2085       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
2086       *pbNewTerm = 1;
2087     }
2088   }else{
2089     iOff = 0;
2090     /* Next entry is not on the current page */
2091     while( iOff==0 ){
2092       fts5SegIterNextPage(p, pIter);
2093       pLeaf = pIter->pLeaf;
2094       if( pLeaf==0 ) break;
2095       ASSERT_SZLEAF_OK(pLeaf);
2096       if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
2097         iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
2098         pIter->iLeafOffset = iOff;
2099 
2100         if( pLeaf->nn>pLeaf->szLeaf ){
2101           pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2102               &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
2103           );
2104         }
2105       }
2106       else if( pLeaf->nn>pLeaf->szLeaf ){
2107         pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
2108             &pLeaf->p[pLeaf->szLeaf], iOff
2109         );
2110         pIter->iLeafOffset = iOff;
2111         pIter->iEndofDoclist = iOff;
2112         bNewTerm = 1;
2113       }
2114       assert_nc( iOff<pLeaf->szLeaf );
2115       if( iOff>pLeaf->szLeaf ){
2116         p->rc = FTS5_CORRUPT;
2117         return;
2118       }
2119     }
2120   }
2121 
2122   /* Check if the iterator is now at EOF. If so, return early. */
2123   if( pIter->pLeaf ){
2124     if( bNewTerm ){
2125       if( pIter->flags & FTS5_SEGITER_ONETERM ){
2126         fts5DataRelease(pIter->pLeaf);
2127         pIter->pLeaf = 0;
2128       }else{
2129         fts5SegIterLoadTerm(p, pIter, nKeep);
2130         fts5SegIterLoadNPos(p, pIter);
2131         if( pbNewTerm ) *pbNewTerm = 1;
2132       }
2133     }else{
2134       /* The following could be done by calling fts5SegIterLoadNPos(). But
2135       ** this block is particularly performance critical, so equivalent
2136       ** code is inlined.  */
2137       int nSz;
2138       assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
2139       fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
2140       pIter->bDel = (nSz & 0x0001);
2141       pIter->nPos = nSz>>1;
2142       assert_nc( pIter->nPos>=0 );
2143     }
2144   }
2145 }
2146 
2147 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
2148 
2149 #define fts5IndexSkipVarint(a, iOff) {            \
2150   int iEnd = iOff+9;                              \
2151   while( (a[iOff++] & 0x80) && iOff<iEnd );       \
2152 }
2153 
2154 /*
2155 ** Iterator pIter currently points to the first rowid in a doclist. This
2156 ** function sets the iterator up so that iterates in reverse order through
2157 ** the doclist.
2158 */
fts5SegIterReverse(Fts5Index * p,Fts5SegIter * pIter)2159 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
2160   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2161   Fts5Data *pLast = 0;
2162   int pgnoLast = 0;
2163 
2164   if( pDlidx ){
2165     int iSegid = pIter->pSeg->iSegid;
2166     pgnoLast = fts5DlidxIterPgno(pDlidx);
2167     pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
2168   }else{
2169     Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
2170 
2171     /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
2172     ** position-list content for the current rowid. Back it up so that it
2173     ** points to the start of the position-list size field. */
2174     int iPoslist;
2175     if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
2176       iPoslist = pIter->iTermLeafOffset;
2177     }else{
2178       iPoslist = 4;
2179     }
2180     fts5IndexSkipVarint(pLeaf->p, iPoslist);
2181     pIter->iLeafOffset = iPoslist;
2182 
2183     /* If this condition is true then the largest rowid for the current
2184     ** term may not be stored on the current page. So search forward to
2185     ** see where said rowid really is.  */
2186     if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
2187       int pgno;
2188       Fts5StructureSegment *pSeg = pIter->pSeg;
2189 
2190       /* The last rowid in the doclist may not be on the current page. Search
2191       ** forward to find the page containing the last rowid.  */
2192       for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
2193         i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
2194         Fts5Data *pNew = fts5LeafRead(p, iAbs);
2195         if( pNew ){
2196           int iRowid, bTermless;
2197           iRowid = fts5LeafFirstRowidOff(pNew);
2198           bTermless = fts5LeafIsTermless(pNew);
2199           if( iRowid ){
2200             SWAPVAL(Fts5Data*, pNew, pLast);
2201             pgnoLast = pgno;
2202           }
2203           fts5DataRelease(pNew);
2204           if( bTermless==0 ) break;
2205         }
2206       }
2207     }
2208   }
2209 
2210   /* If pLast is NULL at this point, then the last rowid for this doclist
2211   ** lies on the page currently indicated by the iterator. In this case
2212   ** pIter->iLeafOffset is already set to point to the position-list size
2213   ** field associated with the first relevant rowid on the page.
2214   **
2215   ** Or, if pLast is non-NULL, then it is the page that contains the last
2216   ** rowid. In this case configure the iterator so that it points to the
2217   ** first rowid on this page.
2218   */
2219   if( pLast ){
2220     int iOff;
2221     fts5DataRelease(pIter->pLeaf);
2222     pIter->pLeaf = pLast;
2223     pIter->iLeafPgno = pgnoLast;
2224     iOff = fts5LeafFirstRowidOff(pLast);
2225     if( iOff>pLast->szLeaf ){
2226       p->rc = FTS5_CORRUPT;
2227       return;
2228     }
2229     iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
2230     pIter->iLeafOffset = iOff;
2231 
2232     if( fts5LeafIsTermless(pLast) ){
2233       pIter->iEndofDoclist = pLast->nn+1;
2234     }else{
2235       pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
2236     }
2237   }
2238 
2239   fts5SegIterReverseInitPage(p, pIter);
2240 }
2241 
2242 /*
2243 ** Iterator pIter currently points to the first rowid of a doclist.
2244 ** There is a doclist-index associated with the final term on the current
2245 ** page. If the current term is the last term on the page, load the
2246 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
2247 */
fts5SegIterLoadDlidx(Fts5Index * p,Fts5SegIter * pIter)2248 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
2249   int iSeg = pIter->pSeg->iSegid;
2250   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2251   Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
2252 
2253   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2254   assert( pIter->pDlidx==0 );
2255 
2256   /* Check if the current doclist ends on this page. If it does, return
2257   ** early without loading the doclist-index (as it belongs to a different
2258   ** term. */
2259   if( pIter->iTermLeafPgno==pIter->iLeafPgno
2260    && pIter->iEndofDoclist<pLeaf->szLeaf
2261   ){
2262     return;
2263   }
2264 
2265   pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
2266 }
2267 
2268 /*
2269 ** The iterator object passed as the second argument currently contains
2270 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
2271 ** function searches the leaf page for a term matching (pTerm/nTerm).
2272 **
2273 ** If the specified term is found on the page, then the iterator is left
2274 ** pointing to it. If argument bGe is zero and the term is not found,
2275 ** the iterator is left pointing at EOF.
2276 **
2277 ** If bGe is non-zero and the specified term is not found, then the
2278 ** iterator is left pointing to the smallest term in the segment that
2279 ** is larger than the specified term, even if this term is not on the
2280 ** current page.
2281 */
fts5LeafSeek(Fts5Index * p,int bGe,Fts5SegIter * pIter,const u8 * pTerm,int nTerm)2282 static void fts5LeafSeek(
2283   Fts5Index *p,                   /* Leave any error code here */
2284   int bGe,                        /* True for a >= search */
2285   Fts5SegIter *pIter,             /* Iterator to seek */
2286   const u8 *pTerm, int nTerm      /* Term to search for */
2287 ){
2288   u32 iOff;
2289   const u8 *a = pIter->pLeaf->p;
2290   u32 n = (u32)pIter->pLeaf->nn;
2291 
2292   u32 nMatch = 0;
2293   u32 nKeep = 0;
2294   u32 nNew = 0;
2295   u32 iTermOff;
2296   u32 iPgidx;                     /* Current offset in pgidx */
2297   int bEndOfPage = 0;
2298 
2299   assert( p->rc==SQLITE_OK );
2300 
2301   iPgidx = (u32)pIter->pLeaf->szLeaf;
2302   iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
2303   iOff = iTermOff;
2304   if( iOff>n ){
2305     p->rc = FTS5_CORRUPT;
2306     return;
2307   }
2308 
2309   while( 1 ){
2310 
2311     /* Figure out how many new bytes are in this term */
2312     fts5FastGetVarint32(a, iOff, nNew);
2313     if( nKeep<nMatch ){
2314       goto search_failed;
2315     }
2316 
2317     assert( nKeep>=nMatch );
2318     if( nKeep==nMatch ){
2319       u32 nCmp;
2320       u32 i;
2321       nCmp = (u32)MIN(nNew, nTerm-nMatch);
2322       for(i=0; i<nCmp; i++){
2323         if( a[iOff+i]!=pTerm[nMatch+i] ) break;
2324       }
2325       nMatch += i;
2326 
2327       if( (u32)nTerm==nMatch ){
2328         if( i==nNew ){
2329           goto search_success;
2330         }else{
2331           goto search_failed;
2332         }
2333       }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
2334         goto search_failed;
2335       }
2336     }
2337 
2338     if( iPgidx>=n ){
2339       bEndOfPage = 1;
2340       break;
2341     }
2342 
2343     iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
2344     iTermOff += nKeep;
2345     iOff = iTermOff;
2346 
2347     if( iOff>=n ){
2348       p->rc = FTS5_CORRUPT;
2349       return;
2350     }
2351 
2352     /* Read the nKeep field of the next term. */
2353     fts5FastGetVarint32(a, iOff, nKeep);
2354   }
2355 
2356  search_failed:
2357   if( bGe==0 ){
2358     fts5DataRelease(pIter->pLeaf);
2359     pIter->pLeaf = 0;
2360     return;
2361   }else if( bEndOfPage ){
2362     do {
2363       fts5SegIterNextPage(p, pIter);
2364       if( pIter->pLeaf==0 ) return;
2365       a = pIter->pLeaf->p;
2366       if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
2367         iPgidx = (u32)pIter->pLeaf->szLeaf;
2368         iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
2369         if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
2370           p->rc = FTS5_CORRUPT;
2371           return;
2372         }else{
2373           nKeep = 0;
2374           iTermOff = iOff;
2375           n = (u32)pIter->pLeaf->nn;
2376           iOff += fts5GetVarint32(&a[iOff], nNew);
2377           break;
2378         }
2379       }
2380     }while( 1 );
2381   }
2382 
2383  search_success:
2384   if( (i64)iOff+nNew>n || nNew<1 ){
2385     p->rc = FTS5_CORRUPT;
2386     return;
2387   }
2388   pIter->iLeafOffset = iOff + nNew;
2389   pIter->iTermLeafOffset = pIter->iLeafOffset;
2390   pIter->iTermLeafPgno = pIter->iLeafPgno;
2391 
2392   fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
2393   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
2394 
2395   if( iPgidx>=n ){
2396     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
2397   }else{
2398     int nExtra;
2399     iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
2400     pIter->iEndofDoclist = iTermOff + nExtra;
2401   }
2402   pIter->iPgidxOff = iPgidx;
2403 
2404   fts5SegIterLoadRowid(p, pIter);
2405   fts5SegIterLoadNPos(p, pIter);
2406 }
2407 
fts5IdxSelectStmt(Fts5Index * p)2408 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
2409   if( p->pIdxSelect==0 ){
2410     Fts5Config *pConfig = p->pConfig;
2411     fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
2412           "SELECT pgno FROM '%q'.'%q_idx' WHERE "
2413           "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
2414           pConfig->zDb, pConfig->zName
2415     ));
2416   }
2417   return p->pIdxSelect;
2418 }
2419 
2420 /*
2421 ** Initialize the object pIter to point to term pTerm/nTerm within segment
2422 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
2423 **
2424 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2425 ** an error has already occurred when this function is called, it is a no-op.
2426 */
fts5SegIterSeekInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5StructureSegment * pSeg,Fts5SegIter * pIter)2427 static void fts5SegIterSeekInit(
2428   Fts5Index *p,                   /* FTS5 backend */
2429   const u8 *pTerm, int nTerm,     /* Term to seek to */
2430   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2431   Fts5StructureSegment *pSeg,     /* Description of segment */
2432   Fts5SegIter *pIter              /* Object to populate */
2433 ){
2434   int iPg = 1;
2435   int bGe = (flags & FTS5INDEX_QUERY_SCAN);
2436   int bDlidx = 0;                 /* True if there is a doclist-index */
2437   sqlite3_stmt *pIdxSelect = 0;
2438 
2439   assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
2440   assert( pTerm && nTerm );
2441   memset(pIter, 0, sizeof(*pIter));
2442   pIter->pSeg = pSeg;
2443 
2444   /* This block sets stack variable iPg to the leaf page number that may
2445   ** contain term (pTerm/nTerm), if it is present in the segment. */
2446   pIdxSelect = fts5IdxSelectStmt(p);
2447   if( p->rc ) return;
2448   sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
2449   sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
2450   if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
2451     i64 val = sqlite3_column_int(pIdxSelect, 0);
2452     iPg = (int)(val>>1);
2453     bDlidx = (val & 0x0001);
2454   }
2455   p->rc = sqlite3_reset(pIdxSelect);
2456   sqlite3_bind_null(pIdxSelect, 2);
2457 
2458   if( iPg<pSeg->pgnoFirst ){
2459     iPg = pSeg->pgnoFirst;
2460     bDlidx = 0;
2461   }
2462 
2463   pIter->iLeafPgno = iPg - 1;
2464   fts5SegIterNextPage(p, pIter);
2465 
2466   if( pIter->pLeaf ){
2467     fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
2468   }
2469 
2470   if( p->rc==SQLITE_OK && bGe==0 ){
2471     pIter->flags |= FTS5_SEGITER_ONETERM;
2472     if( pIter->pLeaf ){
2473       if( flags & FTS5INDEX_QUERY_DESC ){
2474         pIter->flags |= FTS5_SEGITER_REVERSE;
2475       }
2476       if( bDlidx ){
2477         fts5SegIterLoadDlidx(p, pIter);
2478       }
2479       if( flags & FTS5INDEX_QUERY_DESC ){
2480         fts5SegIterReverse(p, pIter);
2481       }
2482     }
2483   }
2484 
2485   fts5SegIterSetNext(p, pIter);
2486 
2487   /* Either:
2488   **
2489   **   1) an error has occurred, or
2490   **   2) the iterator points to EOF, or
2491   **   3) the iterator points to an entry with term (pTerm/nTerm), or
2492   **   4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
2493   **      to an entry with a term greater than or equal to (pTerm/nTerm).
2494   */
2495   assert_nc( p->rc!=SQLITE_OK                                       /* 1 */
2496    || pIter->pLeaf==0                                               /* 2 */
2497    || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0          /* 3 */
2498    || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0)  /* 4 */
2499   );
2500 }
2501 
2502 /*
2503 ** Initialize the object pIter to point to term pTerm/nTerm within the
2504 ** in-memory hash table. If there is no such term in the hash-table, the
2505 ** iterator is set to EOF.
2506 **
2507 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
2508 ** an error has already occurred when this function is called, it is a no-op.
2509 */
fts5SegIterHashInit(Fts5Index * p,const u8 * pTerm,int nTerm,int flags,Fts5SegIter * pIter)2510 static void fts5SegIterHashInit(
2511   Fts5Index *p,                   /* FTS5 backend */
2512   const u8 *pTerm, int nTerm,     /* Term to seek to */
2513   int flags,                      /* Mask of FTS5INDEX_XXX flags */
2514   Fts5SegIter *pIter              /* Object to populate */
2515 ){
2516   int nList = 0;
2517   const u8 *z = 0;
2518   int n = 0;
2519   Fts5Data *pLeaf = 0;
2520 
2521   assert( p->pHash );
2522   assert( p->rc==SQLITE_OK );
2523 
2524   if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
2525     const u8 *pList = 0;
2526 
2527     p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
2528     sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
2529     n = (z ? (int)strlen((const char*)z) : 0);
2530     if( pList ){
2531       pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
2532       if( pLeaf ){
2533         pLeaf->p = (u8*)pList;
2534       }
2535     }
2536   }else{
2537     p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
2538         (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
2539     );
2540     if( pLeaf ){
2541       pLeaf->p = (u8*)&pLeaf[1];
2542     }
2543     z = pTerm;
2544     n = nTerm;
2545     pIter->flags |= FTS5_SEGITER_ONETERM;
2546   }
2547 
2548   if( pLeaf ){
2549     sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
2550     pLeaf->nn = pLeaf->szLeaf = nList;
2551     pIter->pLeaf = pLeaf;
2552     pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
2553     pIter->iEndofDoclist = pLeaf->nn;
2554 
2555     if( flags & FTS5INDEX_QUERY_DESC ){
2556       pIter->flags |= FTS5_SEGITER_REVERSE;
2557       fts5SegIterReverseInitPage(p, pIter);
2558     }else{
2559       fts5SegIterLoadNPos(p, pIter);
2560     }
2561   }
2562 
2563   fts5SegIterSetNext(p, pIter);
2564 }
2565 
2566 /*
2567 ** Zero the iterator passed as the only argument.
2568 */
fts5SegIterClear(Fts5SegIter * pIter)2569 static void fts5SegIterClear(Fts5SegIter *pIter){
2570   fts5BufferFree(&pIter->term);
2571   fts5DataRelease(pIter->pLeaf);
2572   fts5DataRelease(pIter->pNextLeaf);
2573   fts5DlidxIterFree(pIter->pDlidx);
2574   sqlite3_free(pIter->aRowidOffset);
2575   memset(pIter, 0, sizeof(Fts5SegIter));
2576 }
2577 
2578 #ifdef SQLITE_DEBUG
2579 
2580 /*
2581 ** This function is used as part of the big assert() procedure implemented by
2582 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
2583 ** in *pRes is the correct result of comparing the current positions of the
2584 ** two iterators.
2585 */
fts5AssertComparisonResult(Fts5Iter * pIter,Fts5SegIter * p1,Fts5SegIter * p2,Fts5CResult * pRes)2586 static void fts5AssertComparisonResult(
2587   Fts5Iter *pIter,
2588   Fts5SegIter *p1,
2589   Fts5SegIter *p2,
2590   Fts5CResult *pRes
2591 ){
2592   int i1 = p1 - pIter->aSeg;
2593   int i2 = p2 - pIter->aSeg;
2594 
2595   if( p1->pLeaf || p2->pLeaf ){
2596     if( p1->pLeaf==0 ){
2597       assert( pRes->iFirst==i2 );
2598     }else if( p2->pLeaf==0 ){
2599       assert( pRes->iFirst==i1 );
2600     }else{
2601       int nMin = MIN(p1->term.n, p2->term.n);
2602       int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
2603       if( res==0 ) res = p1->term.n - p2->term.n;
2604 
2605       if( res==0 ){
2606         assert( pRes->bTermEq==1 );
2607         assert( p1->iRowid!=p2->iRowid );
2608         res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
2609       }else{
2610         assert( pRes->bTermEq==0 );
2611       }
2612 
2613       if( res<0 ){
2614         assert( pRes->iFirst==i1 );
2615       }else{
2616         assert( pRes->iFirst==i2 );
2617       }
2618     }
2619   }
2620 }
2621 
2622 /*
2623 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
2624 ** is compiled. In that case, this function is essentially an assert()
2625 ** statement used to verify that the contents of the pIter->aFirst[] array
2626 ** are correct.
2627 */
fts5AssertMultiIterSetup(Fts5Index * p,Fts5Iter * pIter)2628 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
2629   if( p->rc==SQLITE_OK ){
2630     Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2631     int i;
2632 
2633     assert( (pFirst->pLeaf==0)==pIter->base.bEof );
2634 
2635     /* Check that pIter->iSwitchRowid is set correctly. */
2636     for(i=0; i<pIter->nSeg; i++){
2637       Fts5SegIter *p1 = &pIter->aSeg[i];
2638       assert( p1==pFirst
2639            || p1->pLeaf==0
2640            || fts5BufferCompare(&pFirst->term, &p1->term)
2641            || p1->iRowid==pIter->iSwitchRowid
2642            || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
2643       );
2644     }
2645 
2646     for(i=0; i<pIter->nSeg; i+=2){
2647       Fts5SegIter *p1 = &pIter->aSeg[i];
2648       Fts5SegIter *p2 = &pIter->aSeg[i+1];
2649       Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
2650       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2651     }
2652 
2653     for(i=1; i<(pIter->nSeg / 2); i+=2){
2654       Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
2655       Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
2656       Fts5CResult *pRes = &pIter->aFirst[i];
2657       fts5AssertComparisonResult(pIter, p1, p2, pRes);
2658     }
2659   }
2660 }
2661 #else
2662 # define fts5AssertMultiIterSetup(x,y)
2663 #endif
2664 
2665 /*
2666 ** Do the comparison necessary to populate pIter->aFirst[iOut].
2667 **
2668 ** If the returned value is non-zero, then it is the index of an entry
2669 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
2670 ** to a key that is a duplicate of another, higher priority,
2671 ** segment-iterator in the pSeg->aSeg[] array.
2672 */
fts5MultiIterDoCompare(Fts5Iter * pIter,int iOut)2673 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
2674   int i1;                         /* Index of left-hand Fts5SegIter */
2675   int i2;                         /* Index of right-hand Fts5SegIter */
2676   int iRes;
2677   Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
2678   Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
2679   Fts5CResult *pRes = &pIter->aFirst[iOut];
2680 
2681   assert( iOut<pIter->nSeg && iOut>0 );
2682   assert( pIter->bRev==0 || pIter->bRev==1 );
2683 
2684   if( iOut>=(pIter->nSeg/2) ){
2685     i1 = (iOut - pIter->nSeg/2) * 2;
2686     i2 = i1 + 1;
2687   }else{
2688     i1 = pIter->aFirst[iOut*2].iFirst;
2689     i2 = pIter->aFirst[iOut*2+1].iFirst;
2690   }
2691   p1 = &pIter->aSeg[i1];
2692   p2 = &pIter->aSeg[i2];
2693 
2694   pRes->bTermEq = 0;
2695   if( p1->pLeaf==0 ){           /* If p1 is at EOF */
2696     iRes = i2;
2697   }else if( p2->pLeaf==0 ){     /* If p2 is at EOF */
2698     iRes = i1;
2699   }else{
2700     int res = fts5BufferCompare(&p1->term, &p2->term);
2701     if( res==0 ){
2702       assert_nc( i2>i1 );
2703       assert_nc( i2!=0 );
2704       pRes->bTermEq = 1;
2705       if( p1->iRowid==p2->iRowid ){
2706         p1->bDel = p2->bDel;
2707         return i2;
2708       }
2709       res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
2710     }
2711     assert( res!=0 );
2712     if( res<0 ){
2713       iRes = i1;
2714     }else{
2715       iRes = i2;
2716     }
2717   }
2718 
2719   pRes->iFirst = (u16)iRes;
2720   return 0;
2721 }
2722 
2723 /*
2724 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
2725 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
2726 */
fts5SegIterGotoPage(Fts5Index * p,Fts5SegIter * pIter,int iLeafPgno)2727 static void fts5SegIterGotoPage(
2728   Fts5Index *p,                   /* FTS5 backend object */
2729   Fts5SegIter *pIter,             /* Iterator to advance */
2730   int iLeafPgno
2731 ){
2732   assert( iLeafPgno>pIter->iLeafPgno );
2733 
2734   if( iLeafPgno>pIter->pSeg->pgnoLast ){
2735     p->rc = FTS5_CORRUPT;
2736   }else{
2737     fts5DataRelease(pIter->pNextLeaf);
2738     pIter->pNextLeaf = 0;
2739     pIter->iLeafPgno = iLeafPgno-1;
2740     fts5SegIterNextPage(p, pIter);
2741     assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
2742 
2743     if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){
2744       int iOff;
2745       u8 *a = pIter->pLeaf->p;
2746       int n = pIter->pLeaf->szLeaf;
2747 
2748       iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
2749       if( iOff<4 || iOff>=n ){
2750         p->rc = FTS5_CORRUPT;
2751       }else{
2752         iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
2753         pIter->iLeafOffset = iOff;
2754         fts5SegIterLoadNPos(p, pIter);
2755       }
2756     }
2757   }
2758 }
2759 
2760 /*
2761 ** Advance the iterator passed as the second argument until it is at or
2762 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
2763 ** always advanced at least once.
2764 */
fts5SegIterNextFrom(Fts5Index * p,Fts5SegIter * pIter,i64 iMatch)2765 static void fts5SegIterNextFrom(
2766   Fts5Index *p,                   /* FTS5 backend object */
2767   Fts5SegIter *pIter,             /* Iterator to advance */
2768   i64 iMatch                      /* Advance iterator at least this far */
2769 ){
2770   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
2771   Fts5DlidxIter *pDlidx = pIter->pDlidx;
2772   int iLeafPgno = pIter->iLeafPgno;
2773   int bMove = 1;
2774 
2775   assert( pIter->flags & FTS5_SEGITER_ONETERM );
2776   assert( pIter->pDlidx );
2777   assert( pIter->pLeaf );
2778 
2779   if( bRev==0 ){
2780     while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
2781       iLeafPgno = fts5DlidxIterPgno(pDlidx);
2782       fts5DlidxIterNext(p, pDlidx);
2783     }
2784     assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
2785     if( iLeafPgno>pIter->iLeafPgno ){
2786       fts5SegIterGotoPage(p, pIter, iLeafPgno);
2787       bMove = 0;
2788     }
2789   }else{
2790     assert( pIter->pNextLeaf==0 );
2791     assert( iMatch<pIter->iRowid );
2792     while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
2793       fts5DlidxIterPrev(p, pDlidx);
2794     }
2795     iLeafPgno = fts5DlidxIterPgno(pDlidx);
2796 
2797     assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
2798 
2799     if( iLeafPgno<pIter->iLeafPgno ){
2800       pIter->iLeafPgno = iLeafPgno+1;
2801       fts5SegIterReverseNewPage(p, pIter);
2802       bMove = 0;
2803     }
2804   }
2805 
2806   do{
2807     if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
2808     if( pIter->pLeaf==0 ) break;
2809     if( bRev==0 && pIter->iRowid>=iMatch ) break;
2810     if( bRev!=0 && pIter->iRowid<=iMatch ) break;
2811     bMove = 1;
2812   }while( p->rc==SQLITE_OK );
2813 }
2814 
2815 
2816 /*
2817 ** Free the iterator object passed as the second argument.
2818 */
fts5MultiIterFree(Fts5Iter * pIter)2819 static void fts5MultiIterFree(Fts5Iter *pIter){
2820   if( pIter ){
2821     int i;
2822     for(i=0; i<pIter->nSeg; i++){
2823       fts5SegIterClear(&pIter->aSeg[i]);
2824     }
2825     fts5BufferFree(&pIter->poslist);
2826     sqlite3_free(pIter);
2827   }
2828 }
2829 
fts5MultiIterAdvanced(Fts5Index * p,Fts5Iter * pIter,int iChanged,int iMinset)2830 static void fts5MultiIterAdvanced(
2831   Fts5Index *p,                   /* FTS5 backend to iterate within */
2832   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2833   int iChanged,                   /* Index of sub-iterator just advanced */
2834   int iMinset                     /* Minimum entry in aFirst[] to set */
2835 ){
2836   int i;
2837   for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
2838     int iEq;
2839     if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
2840       Fts5SegIter *pSeg = &pIter->aSeg[iEq];
2841       assert( p->rc==SQLITE_OK );
2842       pSeg->xNext(p, pSeg, 0);
2843       i = pIter->nSeg + iEq;
2844     }
2845   }
2846 }
2847 
2848 /*
2849 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
2850 ** points to the same term though - just a different rowid. This function
2851 ** attempts to update the contents of the pIter->aFirst[] accordingly.
2852 ** If it does so successfully, 0 is returned. Otherwise 1.
2853 **
2854 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
2855 ** on the iterator instead. That function does the same as this one, except
2856 ** that it deals with more complicated cases as well.
2857 */
fts5MultiIterAdvanceRowid(Fts5Iter * pIter,int iChanged,Fts5SegIter ** ppFirst)2858 static int fts5MultiIterAdvanceRowid(
2859   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
2860   int iChanged,                   /* Index of sub-iterator just advanced */
2861   Fts5SegIter **ppFirst
2862 ){
2863   Fts5SegIter *pNew = &pIter->aSeg[iChanged];
2864 
2865   if( pNew->iRowid==pIter->iSwitchRowid
2866    || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
2867   ){
2868     int i;
2869     Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
2870     pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
2871     for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
2872       Fts5CResult *pRes = &pIter->aFirst[i];
2873 
2874       assert( pNew->pLeaf );
2875       assert( pRes->bTermEq==0 || pOther->pLeaf );
2876 
2877       if( pRes->bTermEq ){
2878         if( pNew->iRowid==pOther->iRowid ){
2879           return 1;
2880         }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
2881           pIter->iSwitchRowid = pOther->iRowid;
2882           pNew = pOther;
2883         }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
2884           pIter->iSwitchRowid = pOther->iRowid;
2885         }
2886       }
2887       pRes->iFirst = (u16)(pNew - pIter->aSeg);
2888       if( i==1 ) break;
2889 
2890       pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
2891     }
2892   }
2893 
2894   *ppFirst = pNew;
2895   return 0;
2896 }
2897 
2898 /*
2899 ** Set the pIter->bEof variable based on the state of the sub-iterators.
2900 */
fts5MultiIterSetEof(Fts5Iter * pIter)2901 static void fts5MultiIterSetEof(Fts5Iter *pIter){
2902   Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
2903   pIter->base.bEof = pSeg->pLeaf==0;
2904   pIter->iSwitchRowid = pSeg->iRowid;
2905 }
2906 
2907 /*
2908 ** Move the iterator to the next entry.
2909 **
2910 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
2911 ** considered an error if the iterator reaches EOF, or if it is already at
2912 ** EOF when this function is called.
2913 */
fts5MultiIterNext(Fts5Index * p,Fts5Iter * pIter,int bFrom,i64 iFrom)2914 static void fts5MultiIterNext(
2915   Fts5Index *p,
2916   Fts5Iter *pIter,
2917   int bFrom,                      /* True if argument iFrom is valid */
2918   i64 iFrom                       /* Advance at least as far as this */
2919 ){
2920   int bUseFrom = bFrom;
2921   assert( pIter->base.bEof==0 );
2922   while( p->rc==SQLITE_OK ){
2923     int iFirst = pIter->aFirst[1].iFirst;
2924     int bNewTerm = 0;
2925     Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2926     assert( p->rc==SQLITE_OK );
2927     if( bUseFrom && pSeg->pDlidx ){
2928       fts5SegIterNextFrom(p, pSeg, iFrom);
2929     }else{
2930       pSeg->xNext(p, pSeg, &bNewTerm);
2931     }
2932 
2933     if( pSeg->pLeaf==0 || bNewTerm
2934      || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2935     ){
2936       fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2937       fts5MultiIterSetEof(pIter);
2938       pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
2939       if( pSeg->pLeaf==0 ) return;
2940     }
2941 
2942     fts5AssertMultiIterSetup(p, pIter);
2943     assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
2944     if( pIter->bSkipEmpty==0 || pSeg->nPos ){
2945       pIter->xSetOutputs(pIter, pSeg);
2946       return;
2947     }
2948     bUseFrom = 0;
2949   }
2950 }
2951 
fts5MultiIterNext2(Fts5Index * p,Fts5Iter * pIter,int * pbNewTerm)2952 static void fts5MultiIterNext2(
2953   Fts5Index *p,
2954   Fts5Iter *pIter,
2955   int *pbNewTerm                  /* OUT: True if *might* be new term */
2956 ){
2957   assert( pIter->bSkipEmpty );
2958   if( p->rc==SQLITE_OK ){
2959     *pbNewTerm = 0;
2960     do{
2961       int iFirst = pIter->aFirst[1].iFirst;
2962       Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
2963       int bNewTerm = 0;
2964 
2965       assert( p->rc==SQLITE_OK );
2966       pSeg->xNext(p, pSeg, &bNewTerm);
2967       if( pSeg->pLeaf==0 || bNewTerm
2968        || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
2969       ){
2970         fts5MultiIterAdvanced(p, pIter, iFirst, 1);
2971         fts5MultiIterSetEof(pIter);
2972         *pbNewTerm = 1;
2973       }
2974       fts5AssertMultiIterSetup(p, pIter);
2975 
2976     }while( fts5MultiIterIsEmpty(p, pIter) );
2977   }
2978 }
2979 
fts5IterSetOutputs_Noop(Fts5Iter * pUnused1,Fts5SegIter * pUnused2)2980 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
2981   UNUSED_PARAM2(pUnused1, pUnused2);
2982 }
2983 
fts5MultiIterAlloc(Fts5Index * p,int nSeg)2984 static Fts5Iter *fts5MultiIterAlloc(
2985   Fts5Index *p,                   /* FTS5 backend to iterate within */
2986   int nSeg
2987 ){
2988   Fts5Iter *pNew;
2989   int nSlot;                      /* Power of two >= nSeg */
2990 
2991   for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
2992   pNew = fts5IdxMalloc(p,
2993       sizeof(Fts5Iter) +                  /* pNew */
2994       sizeof(Fts5SegIter) * (nSlot-1) +   /* pNew->aSeg[] */
2995       sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
2996   );
2997   if( pNew ){
2998     pNew->nSeg = nSlot;
2999     pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
3000     pNew->pIndex = p;
3001     pNew->xSetOutputs = fts5IterSetOutputs_Noop;
3002   }
3003   return pNew;
3004 }
3005 
fts5PoslistCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)3006 static void fts5PoslistCallback(
3007   Fts5Index *pUnused,
3008   void *pContext,
3009   const u8 *pChunk, int nChunk
3010 ){
3011   UNUSED_PARAM(pUnused);
3012   assert_nc( nChunk>=0 );
3013   if( nChunk>0 ){
3014     fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
3015   }
3016 }
3017 
3018 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
3019 struct PoslistCallbackCtx {
3020   Fts5Buffer *pBuf;               /* Append to this buffer */
3021   Fts5Colset *pColset;            /* Restrict matches to this column */
3022   int eState;                     /* See above */
3023 };
3024 
3025 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
3026 struct PoslistOffsetsCtx {
3027   Fts5Buffer *pBuf;               /* Append to this buffer */
3028   Fts5Colset *pColset;            /* Restrict matches to this column */
3029   int iRead;
3030   int iWrite;
3031 };
3032 
3033 /*
3034 ** TODO: Make this more efficient!
3035 */
fts5IndexColsetTest(Fts5Colset * pColset,int iCol)3036 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
3037   int i;
3038   for(i=0; i<pColset->nCol; i++){
3039     if( pColset->aiCol[i]==iCol ) return 1;
3040   }
3041   return 0;
3042 }
3043 
fts5PoslistOffsetsCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)3044 static void fts5PoslistOffsetsCallback(
3045   Fts5Index *pUnused,
3046   void *pContext,
3047   const u8 *pChunk, int nChunk
3048 ){
3049   PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
3050   UNUSED_PARAM(pUnused);
3051   assert_nc( nChunk>=0 );
3052   if( nChunk>0 ){
3053     int i = 0;
3054     while( i<nChunk ){
3055       int iVal;
3056       i += fts5GetVarint32(&pChunk[i], iVal);
3057       iVal += pCtx->iRead - 2;
3058       pCtx->iRead = iVal;
3059       if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
3060         fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
3061         pCtx->iWrite = iVal;
3062       }
3063     }
3064   }
3065 }
3066 
fts5PoslistFilterCallback(Fts5Index * pUnused,void * pContext,const u8 * pChunk,int nChunk)3067 static void fts5PoslistFilterCallback(
3068   Fts5Index *pUnused,
3069   void *pContext,
3070   const u8 *pChunk, int nChunk
3071 ){
3072   PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
3073   UNUSED_PARAM(pUnused);
3074   assert_nc( nChunk>=0 );
3075   if( nChunk>0 ){
3076     /* Search through to find the first varint with value 1. This is the
3077     ** start of the next columns hits. */
3078     int i = 0;
3079     int iStart = 0;
3080 
3081     if( pCtx->eState==2 ){
3082       int iCol;
3083       fts5FastGetVarint32(pChunk, i, iCol);
3084       if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
3085         pCtx->eState = 1;
3086         fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
3087       }else{
3088         pCtx->eState = 0;
3089       }
3090     }
3091 
3092     do {
3093       while( i<nChunk && pChunk[i]!=0x01 ){
3094         while( pChunk[i] & 0x80 ) i++;
3095         i++;
3096       }
3097       if( pCtx->eState ){
3098         fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3099       }
3100       if( i<nChunk ){
3101         int iCol;
3102         iStart = i;
3103         i++;
3104         if( i>=nChunk ){
3105           pCtx->eState = 2;
3106         }else{
3107           fts5FastGetVarint32(pChunk, i, iCol);
3108           pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
3109           if( pCtx->eState ){
3110             fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
3111             iStart = i;
3112           }
3113         }
3114       }
3115     }while( i<nChunk );
3116   }
3117 }
3118 
fts5ChunkIterate(Fts5Index * p,Fts5SegIter * pSeg,void * pCtx,void (* xChunk)(Fts5Index *,void *,const u8 *,int))3119 static void fts5ChunkIterate(
3120   Fts5Index *p,                   /* Index object */
3121   Fts5SegIter *pSeg,              /* Poslist of this iterator */
3122   void *pCtx,                     /* Context pointer for xChunk callback */
3123   void (*xChunk)(Fts5Index*, void*, const u8*, int)
3124 ){
3125   int nRem = pSeg->nPos;          /* Number of bytes still to come */
3126   Fts5Data *pData = 0;
3127   u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3128   int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
3129   int pgno = pSeg->iLeafPgno;
3130   int pgnoSave = 0;
3131 
3132   /* This function does not work with detail=none databases. */
3133   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
3134 
3135   if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
3136     pgnoSave = pgno+1;
3137   }
3138 
3139   while( 1 ){
3140     xChunk(p, pCtx, pChunk, nChunk);
3141     nRem -= nChunk;
3142     fts5DataRelease(pData);
3143     if( nRem<=0 ){
3144       break;
3145     }else if( pSeg->pSeg==0 ){
3146       p->rc = FTS5_CORRUPT;
3147       return;
3148     }else{
3149       pgno++;
3150       pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
3151       if( pData==0 ) break;
3152       pChunk = &pData->p[4];
3153       nChunk = MIN(nRem, pData->szLeaf - 4);
3154       if( pgno==pgnoSave ){
3155         assert( pSeg->pNextLeaf==0 );
3156         pSeg->pNextLeaf = pData;
3157         pData = 0;
3158       }
3159     }
3160   }
3161 }
3162 
3163 /*
3164 ** Iterator pIter currently points to a valid entry (not EOF). This
3165 ** function appends the position list data for the current entry to
3166 ** buffer pBuf. It does not make a copy of the position-list size
3167 ** field.
3168 */
fts5SegiterPoslist(Fts5Index * p,Fts5SegIter * pSeg,Fts5Colset * pColset,Fts5Buffer * pBuf)3169 static void fts5SegiterPoslist(
3170   Fts5Index *p,
3171   Fts5SegIter *pSeg,
3172   Fts5Colset *pColset,
3173   Fts5Buffer *pBuf
3174 ){
3175   assert( pBuf!=0 );
3176   assert( pSeg!=0 );
3177   if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
3178     assert( pBuf->p!=0 );
3179     assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
3180     memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
3181     if( pColset==0 ){
3182       fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
3183     }else{
3184       if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
3185         PoslistCallbackCtx sCtx;
3186         sCtx.pBuf = pBuf;
3187         sCtx.pColset = pColset;
3188         sCtx.eState = fts5IndexColsetTest(pColset, 0);
3189         assert( sCtx.eState==0 || sCtx.eState==1 );
3190         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
3191       }else{
3192         PoslistOffsetsCtx sCtx;
3193         memset(&sCtx, 0, sizeof(sCtx));
3194         sCtx.pBuf = pBuf;
3195         sCtx.pColset = pColset;
3196         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
3197       }
3198     }
3199   }
3200 }
3201 
3202 /*
3203 ** Parameter pPos points to a buffer containing a position list, size nPos.
3204 ** This function filters it according to pColset (which must be non-NULL)
3205 ** and sets pIter->base.pData/nData to point to the new position list.
3206 ** If memory is required for the new position list, use buffer pIter->poslist.
3207 ** Or, if the new position list is a contiguous subset of the input, set
3208 ** pIter->base.pData/nData to point directly to it.
3209 **
3210 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
3211 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
3212 ** before returning.
3213 */
fts5IndexExtractColset(int * pRc,Fts5Colset * pColset,const u8 * pPos,int nPos,Fts5Iter * pIter)3214 static void fts5IndexExtractColset(
3215   int *pRc,
3216   Fts5Colset *pColset,            /* Colset to filter on */
3217   const u8 *pPos, int nPos,       /* Position list */
3218   Fts5Iter *pIter
3219 ){
3220   if( *pRc==SQLITE_OK ){
3221     const u8 *p = pPos;
3222     const u8 *aCopy = p;
3223     const u8 *pEnd = &p[nPos];    /* One byte past end of position list */
3224     int i = 0;
3225     int iCurrent = 0;
3226 
3227     if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
3228       return;
3229     }
3230 
3231     while( 1 ){
3232       while( pColset->aiCol[i]<iCurrent ){
3233         i++;
3234         if( i==pColset->nCol ){
3235           pIter->base.pData = pIter->poslist.p;
3236           pIter->base.nData = pIter->poslist.n;
3237           return;
3238         }
3239       }
3240 
3241       /* Advance pointer p until it points to pEnd or an 0x01 byte that is
3242       ** not part of a varint */
3243       while( p<pEnd && *p!=0x01 ){
3244         while( *p++ & 0x80 );
3245       }
3246 
3247       if( pColset->aiCol[i]==iCurrent ){
3248         if( pColset->nCol==1 ){
3249           pIter->base.pData = aCopy;
3250           pIter->base.nData = p-aCopy;
3251           return;
3252         }
3253         fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
3254       }
3255       if( p>=pEnd ){
3256         pIter->base.pData = pIter->poslist.p;
3257         pIter->base.nData = pIter->poslist.n;
3258         return;
3259       }
3260       aCopy = p++;
3261       iCurrent = *p++;
3262       if( iCurrent & 0x80 ){
3263         p--;
3264         p += fts5GetVarint32(p, iCurrent);
3265       }
3266     }
3267   }
3268 
3269 }
3270 
3271 /*
3272 ** xSetOutputs callback used by detail=none tables.
3273 */
fts5IterSetOutputs_None(Fts5Iter * pIter,Fts5SegIter * pSeg)3274 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
3275   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
3276   pIter->base.iRowid = pSeg->iRowid;
3277   pIter->base.nData = pSeg->nPos;
3278 }
3279 
3280 /*
3281 ** xSetOutputs callback used by detail=full and detail=col tables when no
3282 ** column filters are specified.
3283 */
fts5IterSetOutputs_Nocolset(Fts5Iter * pIter,Fts5SegIter * pSeg)3284 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3285   pIter->base.iRowid = pSeg->iRowid;
3286   pIter->base.nData = pSeg->nPos;
3287 
3288   assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
3289   assert( pIter->pColset==0 );
3290 
3291   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3292     /* All data is stored on the current page. Populate the output
3293     ** variables to point into the body of the page object. */
3294     pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3295   }else{
3296     /* The data is distributed over two or more pages. Copy it into the
3297     ** Fts5Iter.poslist buffer and then set the output pointer to point
3298     ** to this buffer.  */
3299     fts5BufferZero(&pIter->poslist);
3300     fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
3301     pIter->base.pData = pIter->poslist.p;
3302   }
3303 }
3304 
3305 /*
3306 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
3307 ** against no columns at all).
3308 */
fts5IterSetOutputs_ZeroColset(Fts5Iter * pIter,Fts5SegIter * pSeg)3309 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
3310   UNUSED_PARAM(pSeg);
3311   pIter->base.nData = 0;
3312 }
3313 
3314 /*
3315 ** xSetOutputs callback used by detail=col when there is a column filter
3316 ** and there are 100 or more columns. Also called as a fallback from
3317 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
3318 */
fts5IterSetOutputs_Col(Fts5Iter * pIter,Fts5SegIter * pSeg)3319 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
3320   fts5BufferZero(&pIter->poslist);
3321   fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
3322   pIter->base.iRowid = pSeg->iRowid;
3323   pIter->base.pData = pIter->poslist.p;
3324   pIter->base.nData = pIter->poslist.n;
3325 }
3326 
3327 /*
3328 ** xSetOutputs callback used when:
3329 **
3330 **   * detail=col,
3331 **   * there is a column filter, and
3332 **   * the table contains 100 or fewer columns.
3333 **
3334 ** The last point is to ensure all column numbers are stored as
3335 ** single-byte varints.
3336 */
fts5IterSetOutputs_Col100(Fts5Iter * pIter,Fts5SegIter * pSeg)3337 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
3338 
3339   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3340   assert( pIter->pColset );
3341 
3342   if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
3343     fts5IterSetOutputs_Col(pIter, pSeg);
3344   }else{
3345     u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
3346     u8 *pEnd = (u8*)&a[pSeg->nPos];
3347     int iPrev = 0;
3348     int *aiCol = pIter->pColset->aiCol;
3349     int *aiColEnd = &aiCol[pIter->pColset->nCol];
3350 
3351     u8 *aOut = pIter->poslist.p;
3352     int iPrevOut = 0;
3353 
3354     pIter->base.iRowid = pSeg->iRowid;
3355 
3356     while( a<pEnd ){
3357       iPrev += (int)a++[0] - 2;
3358       while( *aiCol<iPrev ){
3359         aiCol++;
3360         if( aiCol==aiColEnd ) goto setoutputs_col_out;
3361       }
3362       if( *aiCol==iPrev ){
3363         *aOut++ = (u8)((iPrev - iPrevOut) + 2);
3364         iPrevOut = iPrev;
3365       }
3366     }
3367 
3368 setoutputs_col_out:
3369     pIter->base.pData = pIter->poslist.p;
3370     pIter->base.nData = aOut - pIter->poslist.p;
3371   }
3372 }
3373 
3374 /*
3375 ** xSetOutputs callback used by detail=full when there is a column filter.
3376 */
fts5IterSetOutputs_Full(Fts5Iter * pIter,Fts5SegIter * pSeg)3377 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
3378   Fts5Colset *pColset = pIter->pColset;
3379   pIter->base.iRowid = pSeg->iRowid;
3380 
3381   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
3382   assert( pColset );
3383 
3384   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
3385     /* All data is stored on the current page. Populate the output
3386     ** variables to point into the body of the page object. */
3387     const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
3388     int *pRc = &pIter->pIndex->rc;
3389     fts5BufferZero(&pIter->poslist);
3390     fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
3391   }else{
3392     /* The data is distributed over two or more pages. Copy it into the
3393     ** Fts5Iter.poslist buffer and then set the output pointer to point
3394     ** to this buffer.  */
3395     fts5BufferZero(&pIter->poslist);
3396     fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
3397     pIter->base.pData = pIter->poslist.p;
3398     pIter->base.nData = pIter->poslist.n;
3399   }
3400 }
3401 
fts5IterSetOutputCb(int * pRc,Fts5Iter * pIter)3402 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
3403   assert( pIter!=0 || (*pRc)!=SQLITE_OK );
3404   if( *pRc==SQLITE_OK ){
3405     Fts5Config *pConfig = pIter->pIndex->pConfig;
3406     if( pConfig->eDetail==FTS5_DETAIL_NONE ){
3407       pIter->xSetOutputs = fts5IterSetOutputs_None;
3408     }
3409 
3410     else if( pIter->pColset==0 ){
3411       pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
3412     }
3413 
3414     else if( pIter->pColset->nCol==0 ){
3415       pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
3416     }
3417 
3418     else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
3419       pIter->xSetOutputs = fts5IterSetOutputs_Full;
3420     }
3421 
3422     else{
3423       assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
3424       if( pConfig->nCol<=100 ){
3425         pIter->xSetOutputs = fts5IterSetOutputs_Col100;
3426         sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
3427       }else{
3428         pIter->xSetOutputs = fts5IterSetOutputs_Col;
3429       }
3430     }
3431   }
3432 }
3433 
3434 
3435 /*
3436 ** Allocate a new Fts5Iter object.
3437 **
3438 ** The new object will be used to iterate through data in structure pStruct.
3439 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
3440 ** is zero or greater, data from the first nSegment segments on level iLevel
3441 ** is merged.
3442 **
3443 ** The iterator initially points to the first term/rowid entry in the
3444 ** iterated data.
3445 */
fts5MultiIterNew(Fts5Index * p,Fts5Structure * pStruct,int flags,Fts5Colset * pColset,const u8 * pTerm,int nTerm,int iLevel,int nSegment,Fts5Iter ** ppOut)3446 static void fts5MultiIterNew(
3447   Fts5Index *p,                   /* FTS5 backend to iterate within */
3448   Fts5Structure *pStruct,         /* Structure of specific index */
3449   int flags,                      /* FTS5INDEX_QUERY_XXX flags */
3450   Fts5Colset *pColset,            /* Colset to filter on (or NULL) */
3451   const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
3452   int iLevel,                     /* Level to iterate (-1 for all) */
3453   int nSegment,                   /* Number of segments to merge (iLevel>=0) */
3454   Fts5Iter **ppOut                /* New object */
3455 ){
3456   int nSeg = 0;                   /* Number of segment-iters in use */
3457   int iIter = 0;                  /* */
3458   int iSeg;                       /* Used to iterate through segments */
3459   Fts5StructureLevel *pLvl;
3460   Fts5Iter *pNew;
3461 
3462   assert( (pTerm==0 && nTerm==0) || iLevel<0 );
3463 
3464   /* Allocate space for the new multi-seg-iterator. */
3465   if( p->rc==SQLITE_OK ){
3466     if( iLevel<0 ){
3467       assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
3468       nSeg = pStruct->nSegment;
3469       nSeg += (p->pHash ? 1 : 0);
3470     }else{
3471       nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
3472     }
3473   }
3474   *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
3475   if( pNew==0 ){
3476     assert( p->rc!=SQLITE_OK );
3477     goto fts5MultiIterNew_post_check;
3478   }
3479   pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
3480   pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
3481   pNew->pColset = pColset;
3482   if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
3483     fts5IterSetOutputCb(&p->rc, pNew);
3484   }
3485 
3486   /* Initialize each of the component segment iterators. */
3487   if( p->rc==SQLITE_OK ){
3488     if( iLevel<0 ){
3489       Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
3490       if( p->pHash ){
3491         /* Add a segment iterator for the current contents of the hash table. */
3492         Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3493         fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
3494       }
3495       for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
3496         for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
3497           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
3498           Fts5SegIter *pIter = &pNew->aSeg[iIter++];
3499           if( pTerm==0 ){
3500             fts5SegIterInit(p, pSeg, pIter);
3501           }else{
3502             fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
3503           }
3504         }
3505       }
3506     }else{
3507       pLvl = &pStruct->aLevel[iLevel];
3508       for(iSeg=nSeg-1; iSeg>=0; iSeg--){
3509         fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
3510       }
3511     }
3512     assert( iIter==nSeg );
3513   }
3514 
3515   /* If the above was successful, each component iterators now points
3516   ** to the first entry in its segment. In this case initialize the
3517   ** aFirst[] array. Or, if an error has occurred, free the iterator
3518   ** object and set the output variable to NULL.  */
3519   if( p->rc==SQLITE_OK ){
3520     for(iIter=pNew->nSeg-1; iIter>0; iIter--){
3521       int iEq;
3522       if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
3523         Fts5SegIter *pSeg = &pNew->aSeg[iEq];
3524         if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
3525         fts5MultiIterAdvanced(p, pNew, iEq, iIter);
3526       }
3527     }
3528     fts5MultiIterSetEof(pNew);
3529     fts5AssertMultiIterSetup(p, pNew);
3530 
3531     if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
3532       fts5MultiIterNext(p, pNew, 0, 0);
3533     }else if( pNew->base.bEof==0 ){
3534       Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
3535       pNew->xSetOutputs(pNew, pSeg);
3536     }
3537 
3538   }else{
3539     fts5MultiIterFree(pNew);
3540     *ppOut = 0;
3541   }
3542 
3543 fts5MultiIterNew_post_check:
3544   assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
3545   return;
3546 }
3547 
3548 /*
3549 ** Create an Fts5Iter that iterates through the doclist provided
3550 ** as the second argument.
3551 */
fts5MultiIterNew2(Fts5Index * p,Fts5Data * pData,int bDesc,Fts5Iter ** ppOut)3552 static void fts5MultiIterNew2(
3553   Fts5Index *p,                   /* FTS5 backend to iterate within */
3554   Fts5Data *pData,                /* Doclist to iterate through */
3555   int bDesc,                      /* True for descending rowid order */
3556   Fts5Iter **ppOut                /* New object */
3557 ){
3558   Fts5Iter *pNew;
3559   pNew = fts5MultiIterAlloc(p, 2);
3560   if( pNew ){
3561     Fts5SegIter *pIter = &pNew->aSeg[1];
3562 
3563     pIter->flags = FTS5_SEGITER_ONETERM;
3564     if( pData->szLeaf>0 ){
3565       pIter->pLeaf = pData;
3566       pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
3567       pIter->iEndofDoclist = pData->nn;
3568       pNew->aFirst[1].iFirst = 1;
3569       if( bDesc ){
3570         pNew->bRev = 1;
3571         pIter->flags |= FTS5_SEGITER_REVERSE;
3572         fts5SegIterReverseInitPage(p, pIter);
3573       }else{
3574         fts5SegIterLoadNPos(p, pIter);
3575       }
3576       pData = 0;
3577     }else{
3578       pNew->base.bEof = 1;
3579     }
3580     fts5SegIterSetNext(p, pIter);
3581 
3582     *ppOut = pNew;
3583   }
3584 
3585   fts5DataRelease(pData);
3586 }
3587 
3588 /*
3589 ** Return true if the iterator is at EOF or if an error has occurred.
3590 ** False otherwise.
3591 */
fts5MultiIterEof(Fts5Index * p,Fts5Iter * pIter)3592 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
3593   assert( pIter!=0 || p->rc!=SQLITE_OK );
3594   assert( p->rc!=SQLITE_OK
3595       || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
3596   );
3597   return (p->rc || pIter->base.bEof);
3598 }
3599 
3600 /*
3601 ** Return the rowid of the entry that the iterator currently points
3602 ** to. If the iterator points to EOF when this function is called the
3603 ** results are undefined.
3604 */
fts5MultiIterRowid(Fts5Iter * pIter)3605 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
3606   assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
3607   return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
3608 }
3609 
3610 /*
3611 ** Move the iterator to the next entry at or following iMatch.
3612 */
fts5MultiIterNextFrom(Fts5Index * p,Fts5Iter * pIter,i64 iMatch)3613 static void fts5MultiIterNextFrom(
3614   Fts5Index *p,
3615   Fts5Iter *pIter,
3616   i64 iMatch
3617 ){
3618   while( 1 ){
3619     i64 iRowid;
3620     fts5MultiIterNext(p, pIter, 1, iMatch);
3621     if( fts5MultiIterEof(p, pIter) ) break;
3622     iRowid = fts5MultiIterRowid(pIter);
3623     if( pIter->bRev==0 && iRowid>=iMatch ) break;
3624     if( pIter->bRev!=0 && iRowid<=iMatch ) break;
3625   }
3626 }
3627 
3628 /*
3629 ** Return a pointer to a buffer containing the term associated with the
3630 ** entry that the iterator currently points to.
3631 */
fts5MultiIterTerm(Fts5Iter * pIter,int * pn)3632 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
3633   Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
3634   *pn = p->term.n;
3635   return p->term.p;
3636 }
3637 
3638 /*
3639 ** Allocate a new segment-id for the structure pStruct. The new segment
3640 ** id must be between 1 and 65335 inclusive, and must not be used by
3641 ** any currently existing segment. If a free segment id cannot be found,
3642 ** SQLITE_FULL is returned.
3643 **
3644 ** If an error has already occurred, this function is a no-op. 0 is
3645 ** returned in this case.
3646 */
fts5AllocateSegid(Fts5Index * p,Fts5Structure * pStruct)3647 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
3648   int iSegid = 0;
3649 
3650   if( p->rc==SQLITE_OK ){
3651     if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
3652       p->rc = SQLITE_FULL;
3653     }else{
3654       /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
3655       ** array is 63 elements, or 252 bytes, in size.  */
3656       u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
3657       int iLvl, iSeg;
3658       int i;
3659       u32 mask;
3660       memset(aUsed, 0, sizeof(aUsed));
3661       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3662         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3663           int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
3664           if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
3665             aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
3666           }
3667         }
3668       }
3669 
3670       for(i=0; aUsed[i]==0xFFFFFFFF; i++);
3671       mask = aUsed[i];
3672       for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
3673       iSegid += 1 + i*32;
3674 
3675 #ifdef SQLITE_DEBUG
3676       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
3677         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
3678           assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
3679         }
3680       }
3681       assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
3682 
3683       {
3684         sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
3685         if( p->rc==SQLITE_OK ){
3686           u8 aBlob[2] = {0xff, 0xff};
3687           sqlite3_bind_int(pIdxSelect, 1, iSegid);
3688           sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
3689           assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
3690           p->rc = sqlite3_reset(pIdxSelect);
3691           sqlite3_bind_null(pIdxSelect, 2);
3692         }
3693       }
3694 #endif
3695     }
3696   }
3697 
3698   return iSegid;
3699 }
3700 
3701 /*
3702 ** Discard all data currently cached in the hash-tables.
3703 */
fts5IndexDiscardData(Fts5Index * p)3704 static void fts5IndexDiscardData(Fts5Index *p){
3705   assert( p->pHash || p->nPendingData==0 );
3706   if( p->pHash ){
3707     sqlite3Fts5HashClear(p->pHash);
3708     p->nPendingData = 0;
3709   }
3710 }
3711 
3712 /*
3713 ** Return the size of the prefix, in bytes, that buffer
3714 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
3715 **
3716 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
3717 ** than buffer (pOld/nOld).
3718 */
fts5PrefixCompress(int nOld,const u8 * pOld,const u8 * pNew)3719 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
3720   int i;
3721   for(i=0; i<nOld; i++){
3722     if( pOld[i]!=pNew[i] ) break;
3723   }
3724   return i;
3725 }
3726 
fts5WriteDlidxClear(Fts5Index * p,Fts5SegWriter * pWriter,int bFlush)3727 static void fts5WriteDlidxClear(
3728   Fts5Index *p,
3729   Fts5SegWriter *pWriter,
3730   int bFlush                      /* If true, write dlidx to disk */
3731 ){
3732   int i;
3733   assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
3734   for(i=0; i<pWriter->nDlidx; i++){
3735     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3736     if( pDlidx->buf.n==0 ) break;
3737     if( bFlush ){
3738       assert( pDlidx->pgno!=0 );
3739       fts5DataWrite(p,
3740           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3741           pDlidx->buf.p, pDlidx->buf.n
3742       );
3743     }
3744     sqlite3Fts5BufferZero(&pDlidx->buf);
3745     pDlidx->bPrevValid = 0;
3746   }
3747 }
3748 
3749 /*
3750 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
3751 ** Any new array elements are zeroed before returning.
3752 */
fts5WriteDlidxGrow(Fts5Index * p,Fts5SegWriter * pWriter,int nLvl)3753 static int fts5WriteDlidxGrow(
3754   Fts5Index *p,
3755   Fts5SegWriter *pWriter,
3756   int nLvl
3757 ){
3758   if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
3759     Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
3760         pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
3761     );
3762     if( aDlidx==0 ){
3763       p->rc = SQLITE_NOMEM;
3764     }else{
3765       size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
3766       memset(&aDlidx[pWriter->nDlidx], 0, nByte);
3767       pWriter->aDlidx = aDlidx;
3768       pWriter->nDlidx = nLvl;
3769     }
3770   }
3771   return p->rc;
3772 }
3773 
3774 /*
3775 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
3776 ** enough, flush it to disk and return 1. Otherwise discard it and return
3777 ** zero.
3778 */
fts5WriteFlushDlidx(Fts5Index * p,Fts5SegWriter * pWriter)3779 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
3780   int bFlag = 0;
3781 
3782   /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
3783   ** to the database, also write the doclist-index to disk.  */
3784   if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
3785     bFlag = 1;
3786   }
3787   fts5WriteDlidxClear(p, pWriter, bFlag);
3788   pWriter->nEmpty = 0;
3789   return bFlag;
3790 }
3791 
3792 /*
3793 ** This function is called whenever processing of the doclist for the
3794 ** last term on leaf page (pWriter->iBtPage) is completed.
3795 **
3796 ** The doclist-index for that term is currently stored in-memory within the
3797 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
3798 ** writes it out to disk. Or, if it is too small to bother with, discards
3799 ** it.
3800 **
3801 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
3802 */
fts5WriteFlushBtree(Fts5Index * p,Fts5SegWriter * pWriter)3803 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
3804   int bFlag;
3805 
3806   assert( pWriter->iBtPage || pWriter->nEmpty==0 );
3807   if( pWriter->iBtPage==0 ) return;
3808   bFlag = fts5WriteFlushDlidx(p, pWriter);
3809 
3810   if( p->rc==SQLITE_OK ){
3811     const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
3812     /* The following was already done in fts5WriteInit(): */
3813     /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
3814     sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
3815     sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
3816     sqlite3_step(p->pIdxWriter);
3817     p->rc = sqlite3_reset(p->pIdxWriter);
3818     sqlite3_bind_null(p->pIdxWriter, 2);
3819   }
3820   pWriter->iBtPage = 0;
3821 }
3822 
3823 /*
3824 ** This is called once for each leaf page except the first that contains
3825 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
3826 ** is larger than all terms written to earlier leaves, and equal to or
3827 ** smaller than the first term on the new leaf.
3828 **
3829 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
3830 ** has already occurred when this function is called, it is a no-op.
3831 */
fts5WriteBtreeTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3832 static void fts5WriteBtreeTerm(
3833   Fts5Index *p,                   /* FTS5 backend object */
3834   Fts5SegWriter *pWriter,         /* Writer object */
3835   int nTerm, const u8 *pTerm      /* First term on new page */
3836 ){
3837   fts5WriteFlushBtree(p, pWriter);
3838   if( p->rc==SQLITE_OK ){
3839     fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
3840     pWriter->iBtPage = pWriter->writer.pgno;
3841   }
3842 }
3843 
3844 /*
3845 ** This function is called when flushing a leaf page that contains no
3846 ** terms at all to disk.
3847 */
fts5WriteBtreeNoTerm(Fts5Index * p,Fts5SegWriter * pWriter)3848 static void fts5WriteBtreeNoTerm(
3849   Fts5Index *p,                   /* FTS5 backend object */
3850   Fts5SegWriter *pWriter          /* Writer object */
3851 ){
3852   /* If there were no rowids on the leaf page either and the doclist-index
3853   ** has already been started, append an 0x00 byte to it.  */
3854   if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
3855     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
3856     assert( pDlidx->bPrevValid );
3857     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
3858   }
3859 
3860   /* Increment the "number of sequential leaves without a term" counter. */
3861   pWriter->nEmpty++;
3862 }
3863 
fts5DlidxExtractFirstRowid(Fts5Buffer * pBuf)3864 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
3865   i64 iRowid;
3866   int iOff;
3867 
3868   iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
3869   fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
3870   return iRowid;
3871 }
3872 
3873 /*
3874 ** Rowid iRowid has just been appended to the current leaf page. It is the
3875 ** first on the page. This function appends an appropriate entry to the current
3876 ** doclist-index.
3877 */
fts5WriteDlidxAppend(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)3878 static void fts5WriteDlidxAppend(
3879   Fts5Index *p,
3880   Fts5SegWriter *pWriter,
3881   i64 iRowid
3882 ){
3883   int i;
3884   int bDone = 0;
3885 
3886   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
3887     i64 iVal;
3888     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
3889 
3890     if( pDlidx->buf.n>=p->pConfig->pgsz ){
3891       /* The current doclist-index page is full. Write it to disk and push
3892       ** a copy of iRowid (which will become the first rowid on the next
3893       ** doclist-index leaf page) up into the next level of the b-tree
3894       ** hierarchy. If the node being flushed is currently the root node,
3895       ** also push its first rowid upwards. */
3896       pDlidx->buf.p[0] = 0x01;    /* Not the root node */
3897       fts5DataWrite(p,
3898           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
3899           pDlidx->buf.p, pDlidx->buf.n
3900       );
3901       fts5WriteDlidxGrow(p, pWriter, i+2);
3902       pDlidx = &pWriter->aDlidx[i];
3903       if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
3904         i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
3905 
3906         /* This was the root node. Push its first rowid up to the new root. */
3907         pDlidx[1].pgno = pDlidx->pgno;
3908         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
3909         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
3910         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
3911         pDlidx[1].bPrevValid = 1;
3912         pDlidx[1].iPrev = iFirst;
3913       }
3914 
3915       sqlite3Fts5BufferZero(&pDlidx->buf);
3916       pDlidx->bPrevValid = 0;
3917       pDlidx->pgno++;
3918     }else{
3919       bDone = 1;
3920     }
3921 
3922     if( pDlidx->bPrevValid ){
3923       iVal = iRowid - pDlidx->iPrev;
3924     }else{
3925       i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
3926       assert( pDlidx->buf.n==0 );
3927       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
3928       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
3929       iVal = iRowid;
3930     }
3931 
3932     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
3933     pDlidx->bPrevValid = 1;
3934     pDlidx->iPrev = iRowid;
3935   }
3936 }
3937 
fts5WriteFlushLeaf(Fts5Index * p,Fts5SegWriter * pWriter)3938 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
3939   static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
3940   Fts5PageWriter *pPage = &pWriter->writer;
3941   i64 iRowid;
3942 
3943   assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
3944 
3945   /* Set the szLeaf header field. */
3946   assert( 0==fts5GetU16(&pPage->buf.p[2]) );
3947   fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
3948 
3949   if( pWriter->bFirstTermInPage ){
3950     /* No term was written to this page. */
3951     assert( pPage->pgidx.n==0 );
3952     fts5WriteBtreeNoTerm(p, pWriter);
3953   }else{
3954     /* Append the pgidx to the page buffer. Set the szLeaf header field. */
3955     fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
3956   }
3957 
3958   /* Write the page out to disk */
3959   iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
3960   fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
3961 
3962   /* Initialize the next page. */
3963   fts5BufferZero(&pPage->buf);
3964   fts5BufferZero(&pPage->pgidx);
3965   fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
3966   pPage->iPrevPgidx = 0;
3967   pPage->pgno++;
3968 
3969   /* Increase the leaves written counter */
3970   pWriter->nLeafWritten++;
3971 
3972   /* The new leaf holds no terms or rowids */
3973   pWriter->bFirstTermInPage = 1;
3974   pWriter->bFirstRowidInPage = 1;
3975 }
3976 
3977 /*
3978 ** Append term pTerm/nTerm to the segment being written by the writer passed
3979 ** as the second argument.
3980 **
3981 ** If an error occurs, set the Fts5Index.rc error code. If an error has
3982 ** already occurred, this function is a no-op.
3983 */
fts5WriteAppendTerm(Fts5Index * p,Fts5SegWriter * pWriter,int nTerm,const u8 * pTerm)3984 static void fts5WriteAppendTerm(
3985   Fts5Index *p,
3986   Fts5SegWriter *pWriter,
3987   int nTerm, const u8 *pTerm
3988 ){
3989   int nPrefix;                    /* Bytes of prefix compression for term */
3990   Fts5PageWriter *pPage = &pWriter->writer;
3991   Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
3992   int nMin = MIN(pPage->term.n, nTerm);
3993 
3994   assert( p->rc==SQLITE_OK );
3995   assert( pPage->buf.n>=4 );
3996   assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
3997 
3998   /* If the current leaf page is full, flush it to disk. */
3999   if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
4000     if( pPage->buf.n>4 ){
4001       fts5WriteFlushLeaf(p, pWriter);
4002       if( p->rc!=SQLITE_OK ) return;
4003     }
4004     fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
4005   }
4006 
4007   /* TODO1: Updating pgidx here. */
4008   pPgidx->n += sqlite3Fts5PutVarint(
4009       &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
4010   );
4011   pPage->iPrevPgidx = pPage->buf.n;
4012 #if 0
4013   fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
4014   pPgidx->n += 2;
4015 #endif
4016 
4017   if( pWriter->bFirstTermInPage ){
4018     nPrefix = 0;
4019     if( pPage->pgno!=1 ){
4020       /* This is the first term on a leaf that is not the leftmost leaf in
4021       ** the segment b-tree. In this case it is necessary to add a term to
4022       ** the b-tree hierarchy that is (a) larger than the largest term
4023       ** already written to the segment and (b) smaller than or equal to
4024       ** this term. In other words, a prefix of (pTerm/nTerm) that is one
4025       ** byte longer than the longest prefix (pTerm/nTerm) shares with the
4026       ** previous term.
4027       **
4028       ** Usually, the previous term is available in pPage->term. The exception
4029       ** is if this is the first term written in an incremental-merge step.
4030       ** In this case the previous term is not available, so just write a
4031       ** copy of (pTerm/nTerm) into the parent node. This is slightly
4032       ** inefficient, but still correct.  */
4033       int n = nTerm;
4034       if( pPage->term.n ){
4035         n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4036       }
4037       fts5WriteBtreeTerm(p, pWriter, n, pTerm);
4038       if( p->rc!=SQLITE_OK ) return;
4039       pPage = &pWriter->writer;
4040     }
4041   }else{
4042     nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
4043     fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
4044   }
4045 
4046   /* Append the number of bytes of new data, then the term data itself
4047   ** to the page. */
4048   fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
4049   fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
4050 
4051   /* Update the Fts5PageWriter.term field. */
4052   fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
4053   pWriter->bFirstTermInPage = 0;
4054 
4055   pWriter->bFirstRowidInPage = 0;
4056   pWriter->bFirstRowidInDoclist = 1;
4057 
4058   assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
4059   pWriter->aDlidx[0].pgno = pPage->pgno;
4060 }
4061 
4062 /*
4063 ** Append a rowid and position-list size field to the writers output.
4064 */
fts5WriteAppendRowid(Fts5Index * p,Fts5SegWriter * pWriter,i64 iRowid)4065 static void fts5WriteAppendRowid(
4066   Fts5Index *p,
4067   Fts5SegWriter *pWriter,
4068   i64 iRowid
4069 ){
4070   if( p->rc==SQLITE_OK ){
4071     Fts5PageWriter *pPage = &pWriter->writer;
4072 
4073     if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
4074       fts5WriteFlushLeaf(p, pWriter);
4075     }
4076 
4077     /* If this is to be the first rowid written to the page, set the
4078     ** rowid-pointer in the page-header. Also append a value to the dlidx
4079     ** buffer, in case a doclist-index is required.  */
4080     if( pWriter->bFirstRowidInPage ){
4081       fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
4082       fts5WriteDlidxAppend(p, pWriter, iRowid);
4083     }
4084 
4085     /* Write the rowid. */
4086     if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
4087       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
4088     }else{
4089       assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
4090       fts5BufferAppendVarint(&p->rc, &pPage->buf,
4091           (u64)iRowid - (u64)pWriter->iPrevRowid
4092       );
4093     }
4094     pWriter->iPrevRowid = iRowid;
4095     pWriter->bFirstRowidInDoclist = 0;
4096     pWriter->bFirstRowidInPage = 0;
4097   }
4098 }
4099 
fts5WriteAppendPoslistData(Fts5Index * p,Fts5SegWriter * pWriter,const u8 * aData,int nData)4100 static void fts5WriteAppendPoslistData(
4101   Fts5Index *p,
4102   Fts5SegWriter *pWriter,
4103   const u8 *aData,
4104   int nData
4105 ){
4106   Fts5PageWriter *pPage = &pWriter->writer;
4107   const u8 *a = aData;
4108   int n = nData;
4109 
4110   assert( p->pConfig->pgsz>0 );
4111   while( p->rc==SQLITE_OK
4112      && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
4113   ){
4114     int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
4115     int nCopy = 0;
4116     while( nCopy<nReq ){
4117       i64 dummy;
4118       nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
4119     }
4120     fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
4121     a += nCopy;
4122     n -= nCopy;
4123     fts5WriteFlushLeaf(p, pWriter);
4124   }
4125   if( n>0 ){
4126     fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
4127   }
4128 }
4129 
4130 /*
4131 ** Flush any data cached by the writer object to the database. Free any
4132 ** allocations associated with the writer.
4133 */
fts5WriteFinish(Fts5Index * p,Fts5SegWriter * pWriter,int * pnLeaf)4134 static void fts5WriteFinish(
4135   Fts5Index *p,
4136   Fts5SegWriter *pWriter,         /* Writer object */
4137   int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
4138 ){
4139   int i;
4140   Fts5PageWriter *pLeaf = &pWriter->writer;
4141   if( p->rc==SQLITE_OK ){
4142     assert( pLeaf->pgno>=1 );
4143     if( pLeaf->buf.n>4 ){
4144       fts5WriteFlushLeaf(p, pWriter);
4145     }
4146     *pnLeaf = pLeaf->pgno-1;
4147     if( pLeaf->pgno>1 ){
4148       fts5WriteFlushBtree(p, pWriter);
4149     }
4150   }
4151   fts5BufferFree(&pLeaf->term);
4152   fts5BufferFree(&pLeaf->buf);
4153   fts5BufferFree(&pLeaf->pgidx);
4154   fts5BufferFree(&pWriter->btterm);
4155 
4156   for(i=0; i<pWriter->nDlidx; i++){
4157     sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
4158   }
4159   sqlite3_free(pWriter->aDlidx);
4160 }
4161 
fts5WriteInit(Fts5Index * p,Fts5SegWriter * pWriter,int iSegid)4162 static void fts5WriteInit(
4163   Fts5Index *p,
4164   Fts5SegWriter *pWriter,
4165   int iSegid
4166 ){
4167   const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
4168 
4169   memset(pWriter, 0, sizeof(Fts5SegWriter));
4170   pWriter->iSegid = iSegid;
4171 
4172   fts5WriteDlidxGrow(p, pWriter, 1);
4173   pWriter->writer.pgno = 1;
4174   pWriter->bFirstTermInPage = 1;
4175   pWriter->iBtPage = 1;
4176 
4177   assert( pWriter->writer.buf.n==0 );
4178   assert( pWriter->writer.pgidx.n==0 );
4179 
4180   /* Grow the two buffers to pgsz + padding bytes in size. */
4181   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
4182   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
4183 
4184   if( p->pIdxWriter==0 ){
4185     Fts5Config *pConfig = p->pConfig;
4186     fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
4187           "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
4188           pConfig->zDb, pConfig->zName
4189     ));
4190   }
4191 
4192   if( p->rc==SQLITE_OK ){
4193     /* Initialize the 4-byte leaf-page header to 0x00. */
4194     memset(pWriter->writer.buf.p, 0, 4);
4195     pWriter->writer.buf.n = 4;
4196 
4197     /* Bind the current output segment id to the index-writer. This is an
4198     ** optimization over binding the same value over and over as rows are
4199     ** inserted into %_idx by the current writer.  */
4200     sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
4201   }
4202 }
4203 
4204 /*
4205 ** Iterator pIter was used to iterate through the input segments of on an
4206 ** incremental merge operation. This function is called if the incremental
4207 ** merge step has finished but the input has not been completely exhausted.
4208 */
fts5TrimSegments(Fts5Index * p,Fts5Iter * pIter)4209 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
4210   int i;
4211   Fts5Buffer buf;
4212   memset(&buf, 0, sizeof(Fts5Buffer));
4213   for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
4214     Fts5SegIter *pSeg = &pIter->aSeg[i];
4215     if( pSeg->pSeg==0 ){
4216       /* no-op */
4217     }else if( pSeg->pLeaf==0 ){
4218       /* All keys from this input segment have been transfered to the output.
4219       ** Set both the first and last page-numbers to 0 to indicate that the
4220       ** segment is now empty. */
4221       pSeg->pSeg->pgnoLast = 0;
4222       pSeg->pSeg->pgnoFirst = 0;
4223     }else{
4224       int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
4225       i64 iLeafRowid;
4226       Fts5Data *pData;
4227       int iId = pSeg->pSeg->iSegid;
4228       u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
4229 
4230       iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
4231       pData = fts5LeafRead(p, iLeafRowid);
4232       if( pData ){
4233         if( iOff>pData->szLeaf ){
4234           /* This can occur if the pages that the segments occupy overlap - if
4235           ** a single page has been assigned to more than one segment. In
4236           ** this case a prior iteration of this loop may have corrupted the
4237           ** segment currently being trimmed.  */
4238           p->rc = FTS5_CORRUPT;
4239         }else{
4240           fts5BufferZero(&buf);
4241           fts5BufferGrow(&p->rc, &buf, pData->nn);
4242           fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
4243           fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
4244           fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
4245           fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
4246           if( p->rc==SQLITE_OK ){
4247             /* Set the szLeaf field */
4248             fts5PutU16(&buf.p[2], (u16)buf.n);
4249           }
4250 
4251           /* Set up the new page-index array */
4252           fts5BufferAppendVarint(&p->rc, &buf, 4);
4253           if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
4254            && pSeg->iEndofDoclist<pData->szLeaf
4255            && pSeg->iPgidxOff<=pData->nn
4256           ){
4257             int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
4258             fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
4259             fts5BufferAppendBlob(&p->rc, &buf,
4260                 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
4261             );
4262           }
4263 
4264           pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
4265           fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
4266           fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
4267         }
4268         fts5DataRelease(pData);
4269       }
4270     }
4271   }
4272   fts5BufferFree(&buf);
4273 }
4274 
fts5MergeChunkCallback(Fts5Index * p,void * pCtx,const u8 * pChunk,int nChunk)4275 static void fts5MergeChunkCallback(
4276   Fts5Index *p,
4277   void *pCtx,
4278   const u8 *pChunk, int nChunk
4279 ){
4280   Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
4281   fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
4282 }
4283 
4284 /*
4285 **
4286 */
fts5IndexMergeLevel(Fts5Index * p,Fts5Structure ** ppStruct,int iLvl,int * pnRem)4287 static void fts5IndexMergeLevel(
4288   Fts5Index *p,                   /* FTS5 backend object */
4289   Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
4290   int iLvl,                       /* Level to read input from */
4291   int *pnRem                      /* Write up to this many output leaves */
4292 ){
4293   Fts5Structure *pStruct = *ppStruct;
4294   Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4295   Fts5StructureLevel *pLvlOut;
4296   Fts5Iter *pIter = 0;       /* Iterator to read input data */
4297   int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
4298   int nInput;                     /* Number of input segments */
4299   Fts5SegWriter writer;           /* Writer object */
4300   Fts5StructureSegment *pSeg;     /* Output segment */
4301   Fts5Buffer term;
4302   int bOldest;                    /* True if the output segment is the oldest */
4303   int eDetail = p->pConfig->eDetail;
4304   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
4305   int bTermWritten = 0;           /* True if current term already output */
4306 
4307   assert( iLvl<pStruct->nLevel );
4308   assert( pLvl->nMerge<=pLvl->nSeg );
4309 
4310   memset(&writer, 0, sizeof(Fts5SegWriter));
4311   memset(&term, 0, sizeof(Fts5Buffer));
4312   if( pLvl->nMerge ){
4313     pLvlOut = &pStruct->aLevel[iLvl+1];
4314     assert( pLvlOut->nSeg>0 );
4315     nInput = pLvl->nMerge;
4316     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
4317 
4318     fts5WriteInit(p, &writer, pSeg->iSegid);
4319     writer.writer.pgno = pSeg->pgnoLast+1;
4320     writer.iBtPage = 0;
4321   }else{
4322     int iSegid = fts5AllocateSegid(p, pStruct);
4323 
4324     /* Extend the Fts5Structure object as required to ensure the output
4325     ** segment exists. */
4326     if( iLvl==pStruct->nLevel-1 ){
4327       fts5StructureAddLevel(&p->rc, ppStruct);
4328       pStruct = *ppStruct;
4329     }
4330     fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
4331     if( p->rc ) return;
4332     pLvl = &pStruct->aLevel[iLvl];
4333     pLvlOut = &pStruct->aLevel[iLvl+1];
4334 
4335     fts5WriteInit(p, &writer, iSegid);
4336 
4337     /* Add the new segment to the output level */
4338     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
4339     pLvlOut->nSeg++;
4340     pSeg->pgnoFirst = 1;
4341     pSeg->iSegid = iSegid;
4342     pStruct->nSegment++;
4343 
4344     /* Read input from all segments in the input level */
4345     nInput = pLvl->nSeg;
4346   }
4347   bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
4348 
4349   assert( iLvl>=0 );
4350   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
4351       fts5MultiIterEof(p, pIter)==0;
4352       fts5MultiIterNext(p, pIter, 0, 0)
4353   ){
4354     Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
4355     int nPos;                     /* position-list size field value */
4356     int nTerm;
4357     const u8 *pTerm;
4358 
4359     pTerm = fts5MultiIterTerm(pIter, &nTerm);
4360     if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
4361       if( pnRem && writer.nLeafWritten>nRem ){
4362         break;
4363       }
4364       fts5BufferSet(&p->rc, &term, nTerm, pTerm);
4365       bTermWritten =0;
4366     }
4367 
4368     /* Check for key annihilation. */
4369     if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
4370 
4371     if( p->rc==SQLITE_OK && bTermWritten==0 ){
4372       /* This is a new term. Append a term to the output segment. */
4373       fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
4374       bTermWritten = 1;
4375     }
4376 
4377     /* Append the rowid to the output */
4378     /* WRITEPOSLISTSIZE */
4379     fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
4380 
4381     if( eDetail==FTS5_DETAIL_NONE ){
4382       if( pSegIter->bDel ){
4383         fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4384         if( pSegIter->nPos>0 ){
4385           fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
4386         }
4387       }
4388     }else{
4389       /* Append the position-list data to the output */
4390       nPos = pSegIter->nPos*2 + pSegIter->bDel;
4391       fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
4392       fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
4393     }
4394   }
4395 
4396   /* Flush the last leaf page to disk. Set the output segment b-tree height
4397   ** and last leaf page number at the same time.  */
4398   fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
4399 
4400   assert( pIter!=0 || p->rc!=SQLITE_OK );
4401   if( fts5MultiIterEof(p, pIter) ){
4402     int i;
4403 
4404     /* Remove the redundant segments from the %_data table */
4405     for(i=0; i<nInput; i++){
4406       fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
4407     }
4408 
4409     /* Remove the redundant segments from the input level */
4410     if( pLvl->nSeg!=nInput ){
4411       int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
4412       memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
4413     }
4414     pStruct->nSegment -= nInput;
4415     pLvl->nSeg -= nInput;
4416     pLvl->nMerge = 0;
4417     if( pSeg->pgnoLast==0 ){
4418       pLvlOut->nSeg--;
4419       pStruct->nSegment--;
4420     }
4421   }else{
4422     assert( pSeg->pgnoLast>0 );
4423     fts5TrimSegments(p, pIter);
4424     pLvl->nMerge = nInput;
4425   }
4426 
4427   fts5MultiIterFree(pIter);
4428   fts5BufferFree(&term);
4429   if( pnRem ) *pnRem -= writer.nLeafWritten;
4430 }
4431 
4432 /*
4433 ** Do up to nPg pages of automerge work on the index.
4434 **
4435 ** Return true if any changes were actually made, or false otherwise.
4436 */
fts5IndexMerge(Fts5Index * p,Fts5Structure ** ppStruct,int nPg,int nMin)4437 static int fts5IndexMerge(
4438   Fts5Index *p,                   /* FTS5 backend object */
4439   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4440   int nPg,                        /* Pages of work to do */
4441   int nMin                        /* Minimum number of segments to merge */
4442 ){
4443   int nRem = nPg;
4444   int bRet = 0;
4445   Fts5Structure *pStruct = *ppStruct;
4446   while( nRem>0 && p->rc==SQLITE_OK ){
4447     int iLvl;                   /* To iterate through levels */
4448     int iBestLvl = 0;           /* Level offering the most input segments */
4449     int nBest = 0;              /* Number of input segments on best level */
4450 
4451     /* Set iBestLvl to the level to read input segments from. */
4452     assert( pStruct->nLevel>0 );
4453     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
4454       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
4455       if( pLvl->nMerge ){
4456         if( pLvl->nMerge>nBest ){
4457           iBestLvl = iLvl;
4458           nBest = pLvl->nMerge;
4459         }
4460         break;
4461       }
4462       if( pLvl->nSeg>nBest ){
4463         nBest = pLvl->nSeg;
4464         iBestLvl = iLvl;
4465       }
4466     }
4467 
4468     /* If nBest is still 0, then the index must be empty. */
4469 #ifdef SQLITE_DEBUG
4470     for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
4471       assert( pStruct->aLevel[iLvl].nSeg==0 );
4472     }
4473 #endif
4474 
4475     if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
4476       break;
4477     }
4478     bRet = 1;
4479     fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
4480     if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
4481       fts5StructurePromote(p, iBestLvl+1, pStruct);
4482     }
4483   }
4484   *ppStruct = pStruct;
4485   return bRet;
4486 }
4487 
4488 /*
4489 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
4490 ** segment. This function updates the write-counter accordingly and, if
4491 ** necessary, performs incremental merge work.
4492 **
4493 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4494 ** already occurred, this function is a no-op.
4495 */
fts5IndexAutomerge(Fts5Index * p,Fts5Structure ** ppStruct,int nLeaf)4496 static void fts5IndexAutomerge(
4497   Fts5Index *p,                   /* FTS5 backend object */
4498   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
4499   int nLeaf                       /* Number of output leaves just written */
4500 ){
4501   if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
4502     Fts5Structure *pStruct = *ppStruct;
4503     u64 nWrite;                   /* Initial value of write-counter */
4504     int nWork;                    /* Number of work-quanta to perform */
4505     int nRem;                     /* Number of leaf pages left to write */
4506 
4507     /* Update the write-counter. While doing so, set nWork. */
4508     nWrite = pStruct->nWriteCounter;
4509     nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
4510     pStruct->nWriteCounter += nLeaf;
4511     nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
4512 
4513     fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
4514   }
4515 }
4516 
fts5IndexCrisismerge(Fts5Index * p,Fts5Structure ** ppStruct)4517 static void fts5IndexCrisismerge(
4518   Fts5Index *p,                   /* FTS5 backend object */
4519   Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
4520 ){
4521   const int nCrisis = p->pConfig->nCrisisMerge;
4522   Fts5Structure *pStruct = *ppStruct;
4523   int iLvl = 0;
4524 
4525   assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
4526   while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
4527     fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
4528     assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
4529     fts5StructurePromote(p, iLvl+1, pStruct);
4530     iLvl++;
4531   }
4532   *ppStruct = pStruct;
4533 }
4534 
fts5IndexReturn(Fts5Index * p)4535 static int fts5IndexReturn(Fts5Index *p){
4536   int rc = p->rc;
4537   p->rc = SQLITE_OK;
4538   return rc;
4539 }
4540 
4541 typedef struct Fts5FlushCtx Fts5FlushCtx;
4542 struct Fts5FlushCtx {
4543   Fts5Index *pIdx;
4544   Fts5SegWriter writer;
4545 };
4546 
4547 /*
4548 ** Buffer aBuf[] contains a list of varints, all small enough to fit
4549 ** in a 32-bit integer. Return the size of the largest prefix of this
4550 ** list nMax bytes or less in size.
4551 */
fts5PoslistPrefix(const u8 * aBuf,int nMax)4552 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
4553   int ret;
4554   u32 dummy;
4555   ret = fts5GetVarint32(aBuf, dummy);
4556   if( ret<nMax ){
4557     while( 1 ){
4558       int i = fts5GetVarint32(&aBuf[ret], dummy);
4559       if( (ret + i) > nMax ) break;
4560       ret += i;
4561     }
4562   }
4563   return ret;
4564 }
4565 
4566 /*
4567 ** Flush the contents of in-memory hash table iHash to a new level-0
4568 ** segment on disk. Also update the corresponding structure record.
4569 **
4570 ** If an error occurs, set the Fts5Index.rc error code. If an error has
4571 ** already occurred, this function is a no-op.
4572 */
fts5FlushOneHash(Fts5Index * p)4573 static void fts5FlushOneHash(Fts5Index *p){
4574   Fts5Hash *pHash = p->pHash;
4575   Fts5Structure *pStruct;
4576   int iSegid;
4577   int pgnoLast = 0;                 /* Last leaf page number in segment */
4578 
4579   /* Obtain a reference to the index structure and allocate a new segment-id
4580   ** for the new level-0 segment.  */
4581   pStruct = fts5StructureRead(p);
4582   iSegid = fts5AllocateSegid(p, pStruct);
4583   fts5StructureInvalidate(p);
4584 
4585   if( iSegid ){
4586     const int pgsz = p->pConfig->pgsz;
4587     int eDetail = p->pConfig->eDetail;
4588     Fts5StructureSegment *pSeg;   /* New segment within pStruct */
4589     Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
4590     Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
4591 
4592     Fts5SegWriter writer;
4593     fts5WriteInit(p, &writer, iSegid);
4594 
4595     pBuf = &writer.writer.buf;
4596     pPgidx = &writer.writer.pgidx;
4597 
4598     /* fts5WriteInit() should have initialized the buffers to (most likely)
4599     ** the maximum space required. */
4600     assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4601     assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
4602 
4603     /* Begin scanning through hash table entries. This loop runs once for each
4604     ** term/doclist currently stored within the hash table. */
4605     if( p->rc==SQLITE_OK ){
4606       p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
4607     }
4608     while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
4609       const char *zTerm;          /* Buffer containing term */
4610       const u8 *pDoclist;         /* Pointer to doclist for this term */
4611       int nDoclist;               /* Size of doclist in bytes */
4612 
4613       /* Write the term for this entry to disk. */
4614       sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
4615       fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
4616       if( p->rc!=SQLITE_OK ) break;
4617 
4618       assert( writer.bFirstRowidInPage==0 );
4619       if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
4620         /* The entire doclist will fit on the current leaf. */
4621         fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
4622       }else{
4623         i64 iRowid = 0;
4624         u64 iDelta = 0;
4625         int iOff = 0;
4626 
4627         /* The entire doclist will not fit on this leaf. The following
4628         ** loop iterates through the poslists that make up the current
4629         ** doclist.  */
4630         while( p->rc==SQLITE_OK && iOff<nDoclist ){
4631           iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
4632           iRowid += iDelta;
4633 
4634           if( writer.bFirstRowidInPage ){
4635             fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
4636             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
4637             writer.bFirstRowidInPage = 0;
4638             fts5WriteDlidxAppend(p, &writer, iRowid);
4639             if( p->rc!=SQLITE_OK ) break;
4640           }else{
4641             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
4642           }
4643           assert( pBuf->n<=pBuf->nSpace );
4644 
4645           if( eDetail==FTS5_DETAIL_NONE ){
4646             if( iOff<nDoclist && pDoclist[iOff]==0 ){
4647               pBuf->p[pBuf->n++] = 0;
4648               iOff++;
4649               if( iOff<nDoclist && pDoclist[iOff]==0 ){
4650                 pBuf->p[pBuf->n++] = 0;
4651                 iOff++;
4652               }
4653             }
4654             if( (pBuf->n + pPgidx->n)>=pgsz ){
4655               fts5WriteFlushLeaf(p, &writer);
4656             }
4657           }else{
4658             int bDummy;
4659             int nPos;
4660             int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
4661             nCopy += nPos;
4662             if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
4663               /* The entire poslist will fit on the current leaf. So copy
4664               ** it in one go. */
4665               fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
4666             }else{
4667               /* The entire poslist will not fit on this leaf. So it needs
4668               ** to be broken into sections. The only qualification being
4669               ** that each varint must be stored contiguously.  */
4670               const u8 *pPoslist = &pDoclist[iOff];
4671               int iPos = 0;
4672               while( p->rc==SQLITE_OK ){
4673                 int nSpace = pgsz - pBuf->n - pPgidx->n;
4674                 int n = 0;
4675                 if( (nCopy - iPos)<=nSpace ){
4676                   n = nCopy - iPos;
4677                 }else{
4678                   n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
4679                 }
4680                 assert( n>0 );
4681                 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
4682                 iPos += n;
4683                 if( (pBuf->n + pPgidx->n)>=pgsz ){
4684                   fts5WriteFlushLeaf(p, &writer);
4685                 }
4686                 if( iPos>=nCopy ) break;
4687               }
4688             }
4689             iOff += nCopy;
4690           }
4691         }
4692       }
4693 
4694       /* TODO2: Doclist terminator written here. */
4695       /* pBuf->p[pBuf->n++] = '\0'; */
4696       assert( pBuf->n<=pBuf->nSpace );
4697       if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
4698     }
4699     sqlite3Fts5HashClear(pHash);
4700     fts5WriteFinish(p, &writer, &pgnoLast);
4701 
4702     /* Update the Fts5Structure. It is written back to the database by the
4703     ** fts5StructureRelease() call below.  */
4704     if( pStruct->nLevel==0 ){
4705       fts5StructureAddLevel(&p->rc, &pStruct);
4706     }
4707     fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
4708     if( p->rc==SQLITE_OK ){
4709       pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
4710       pSeg->iSegid = iSegid;
4711       pSeg->pgnoFirst = 1;
4712       pSeg->pgnoLast = pgnoLast;
4713       pStruct->nSegment++;
4714     }
4715     fts5StructurePromote(p, 0, pStruct);
4716   }
4717 
4718   fts5IndexAutomerge(p, &pStruct, pgnoLast);
4719   fts5IndexCrisismerge(p, &pStruct);
4720   fts5StructureWrite(p, pStruct);
4721   fts5StructureRelease(pStruct);
4722 }
4723 
4724 /*
4725 ** Flush any data stored in the in-memory hash tables to the database.
4726 */
fts5IndexFlush(Fts5Index * p)4727 static void fts5IndexFlush(Fts5Index *p){
4728   /* Unless it is empty, flush the hash table to disk */
4729   if( p->nPendingData ){
4730     assert( p->pHash );
4731     p->nPendingData = 0;
4732     fts5FlushOneHash(p);
4733   }
4734 }
4735 
fts5IndexOptimizeStruct(Fts5Index * p,Fts5Structure * pStruct)4736 static Fts5Structure *fts5IndexOptimizeStruct(
4737   Fts5Index *p,
4738   Fts5Structure *pStruct
4739 ){
4740   Fts5Structure *pNew = 0;
4741   sqlite3_int64 nByte = sizeof(Fts5Structure);
4742   int nSeg = pStruct->nSegment;
4743   int i;
4744 
4745   /* Figure out if this structure requires optimization. A structure does
4746   ** not require optimization if either:
4747   **
4748   **  + it consists of fewer than two segments, or
4749   **  + all segments are on the same level, or
4750   **  + all segments except one are currently inputs to a merge operation.
4751   **
4752   ** In the first case, return NULL. In the second, increment the ref-count
4753   ** on *pStruct and return a copy of the pointer to it.
4754   */
4755   if( nSeg<2 ) return 0;
4756   for(i=0; i<pStruct->nLevel; i++){
4757     int nThis = pStruct->aLevel[i].nSeg;
4758     if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
4759       fts5StructureRef(pStruct);
4760       return pStruct;
4761     }
4762     assert( pStruct->aLevel[i].nMerge<=nThis );
4763   }
4764 
4765   nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
4766   pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
4767 
4768   if( pNew ){
4769     Fts5StructureLevel *pLvl;
4770     nByte = nSeg * sizeof(Fts5StructureSegment);
4771     pNew->nLevel = pStruct->nLevel+1;
4772     pNew->nRef = 1;
4773     pNew->nWriteCounter = pStruct->nWriteCounter;
4774     pLvl = &pNew->aLevel[pStruct->nLevel];
4775     pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
4776     if( pLvl->aSeg ){
4777       int iLvl, iSeg;
4778       int iSegOut = 0;
4779       /* Iterate through all segments, from oldest to newest. Add them to
4780       ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
4781       ** segment in the data structure.  */
4782       for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
4783         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
4784           pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
4785           iSegOut++;
4786         }
4787       }
4788       pNew->nSegment = pLvl->nSeg = nSeg;
4789     }else{
4790       sqlite3_free(pNew);
4791       pNew = 0;
4792     }
4793   }
4794 
4795   return pNew;
4796 }
4797 
sqlite3Fts5IndexOptimize(Fts5Index * p)4798 int sqlite3Fts5IndexOptimize(Fts5Index *p){
4799   Fts5Structure *pStruct;
4800   Fts5Structure *pNew = 0;
4801 
4802   assert( p->rc==SQLITE_OK );
4803   fts5IndexFlush(p);
4804   pStruct = fts5StructureRead(p);
4805   fts5StructureInvalidate(p);
4806 
4807   if( pStruct ){
4808     pNew = fts5IndexOptimizeStruct(p, pStruct);
4809   }
4810   fts5StructureRelease(pStruct);
4811 
4812   assert( pNew==0 || pNew->nSegment>0 );
4813   if( pNew ){
4814     int iLvl;
4815     for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
4816     while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
4817       int nRem = FTS5_OPT_WORK_UNIT;
4818       fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
4819     }
4820 
4821     fts5StructureWrite(p, pNew);
4822     fts5StructureRelease(pNew);
4823   }
4824 
4825   return fts5IndexReturn(p);
4826 }
4827 
4828 /*
4829 ** This is called to implement the special "VALUES('merge', $nMerge)"
4830 ** INSERT command.
4831 */
sqlite3Fts5IndexMerge(Fts5Index * p,int nMerge)4832 int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
4833   Fts5Structure *pStruct = fts5StructureRead(p);
4834   if( pStruct ){
4835     int nMin = p->pConfig->nUsermerge;
4836     fts5StructureInvalidate(p);
4837     if( nMerge<0 ){
4838       Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
4839       fts5StructureRelease(pStruct);
4840       pStruct = pNew;
4841       nMin = 2;
4842       nMerge = nMerge*-1;
4843     }
4844     if( pStruct && pStruct->nLevel ){
4845       if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
4846         fts5StructureWrite(p, pStruct);
4847       }
4848     }
4849     fts5StructureRelease(pStruct);
4850   }
4851   return fts5IndexReturn(p);
4852 }
4853 
fts5AppendRowid(Fts5Index * p,u64 iDelta,Fts5Iter * pUnused,Fts5Buffer * pBuf)4854 static void fts5AppendRowid(
4855   Fts5Index *p,
4856   u64 iDelta,
4857   Fts5Iter *pUnused,
4858   Fts5Buffer *pBuf
4859 ){
4860   UNUSED_PARAM(pUnused);
4861   fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
4862 }
4863 
fts5AppendPoslist(Fts5Index * p,u64 iDelta,Fts5Iter * pMulti,Fts5Buffer * pBuf)4864 static void fts5AppendPoslist(
4865   Fts5Index *p,
4866   u64 iDelta,
4867   Fts5Iter *pMulti,
4868   Fts5Buffer *pBuf
4869 ){
4870   int nData = pMulti->base.nData;
4871   int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
4872   assert( nData>0 );
4873   if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
4874     fts5BufferSafeAppendVarint(pBuf, iDelta);
4875     fts5BufferSafeAppendVarint(pBuf, nData*2);
4876     fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
4877     memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
4878   }
4879 }
4880 
4881 
fts5DoclistIterNext(Fts5DoclistIter * pIter)4882 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
4883   u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
4884 
4885   assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
4886   if( p>=pIter->aEof ){
4887     pIter->aPoslist = 0;
4888   }else{
4889     i64 iDelta;
4890 
4891     p += fts5GetVarint(p, (u64*)&iDelta);
4892     pIter->iRowid += iDelta;
4893 
4894     /* Read position list size */
4895     if( p[0] & 0x80 ){
4896       int nPos;
4897       pIter->nSize = fts5GetVarint32(p, nPos);
4898       pIter->nPoslist = (nPos>>1);
4899     }else{
4900       pIter->nPoslist = ((int)(p[0])) >> 1;
4901       pIter->nSize = 1;
4902     }
4903 
4904     pIter->aPoslist = p;
4905     if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
4906       pIter->aPoslist = 0;
4907     }
4908   }
4909 }
4910 
fts5DoclistIterInit(Fts5Buffer * pBuf,Fts5DoclistIter * pIter)4911 static void fts5DoclistIterInit(
4912   Fts5Buffer *pBuf,
4913   Fts5DoclistIter *pIter
4914 ){
4915   memset(pIter, 0, sizeof(*pIter));
4916   if( pBuf->n>0 ){
4917     pIter->aPoslist = pBuf->p;
4918     pIter->aEof = &pBuf->p[pBuf->n];
4919     fts5DoclistIterNext(pIter);
4920   }
4921 }
4922 
4923 #if 0
4924 /*
4925 ** Append a doclist to buffer pBuf.
4926 **
4927 ** This function assumes that space within the buffer has already been
4928 ** allocated.
4929 */
4930 static void fts5MergeAppendDocid(
4931   Fts5Buffer *pBuf,               /* Buffer to write to */
4932   i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
4933   i64 iRowid                      /* Rowid to append */
4934 ){
4935   assert( pBuf->n!=0 || (*piLastRowid)==0 );
4936   fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
4937   *piLastRowid = iRowid;
4938 }
4939 #endif
4940 
4941 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {                 \
4942   assert( (pBuf)->n!=0 || (iLastRowid)==0 );                             \
4943   fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
4944   (iLastRowid) = (iRowid);                                               \
4945 }
4946 
4947 /*
4948 ** Swap the contents of buffer *p1 with that of *p2.
4949 */
fts5BufferSwap(Fts5Buffer * p1,Fts5Buffer * p2)4950 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
4951   Fts5Buffer tmp = *p1;
4952   *p1 = *p2;
4953   *p2 = tmp;
4954 }
4955 
fts5NextRowid(Fts5Buffer * pBuf,int * piOff,i64 * piRowid)4956 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
4957   int i = *piOff;
4958   if( i>=pBuf->n ){
4959     *piOff = -1;
4960   }else{
4961     u64 iVal;
4962     *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
4963     *piRowid += iVal;
4964   }
4965 }
4966 
4967 /*
4968 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
4969 ** In this case the buffers consist of a delta-encoded list of rowids only.
4970 */
fts5MergeRowidLists(Fts5Index * p,Fts5Buffer * p1,int nBuf,Fts5Buffer * aBuf)4971 static void fts5MergeRowidLists(
4972   Fts5Index *p,                   /* FTS5 backend object */
4973   Fts5Buffer *p1,                 /* First list to merge */
4974   int nBuf,                       /* Number of entries in apBuf[] */
4975   Fts5Buffer *aBuf                /* Array of other lists to merge into p1 */
4976 ){
4977   int i1 = 0;
4978   int i2 = 0;
4979   i64 iRowid1 = 0;
4980   i64 iRowid2 = 0;
4981   i64 iOut = 0;
4982   Fts5Buffer *p2 = &aBuf[0];
4983   Fts5Buffer out;
4984 
4985   (void)nBuf;
4986   memset(&out, 0, sizeof(out));
4987   assert( nBuf==1 );
4988   sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
4989   if( p->rc ) return;
4990 
4991   fts5NextRowid(p1, &i1, &iRowid1);
4992   fts5NextRowid(p2, &i2, &iRowid2);
4993   while( i1>=0 || i2>=0 ){
4994     if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
4995       assert( iOut==0 || iRowid1>iOut );
4996       fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
4997       iOut = iRowid1;
4998       fts5NextRowid(p1, &i1, &iRowid1);
4999     }else{
5000       assert( iOut==0 || iRowid2>iOut );
5001       fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
5002       iOut = iRowid2;
5003       if( i1>=0 && iRowid1==iRowid2 ){
5004         fts5NextRowid(p1, &i1, &iRowid1);
5005       }
5006       fts5NextRowid(p2, &i2, &iRowid2);
5007     }
5008   }
5009 
5010   fts5BufferSwap(&out, p1);
5011   fts5BufferFree(&out);
5012 }
5013 
5014 typedef struct PrefixMerger PrefixMerger;
5015 struct PrefixMerger {
5016   Fts5DoclistIter iter;           /* Doclist iterator */
5017   i64 iPos;                       /* For iterating through a position list */
5018   int iOff;
5019   u8 *aPos;
5020   PrefixMerger *pNext;            /* Next in docid/poslist order */
5021 };
5022 
fts5PrefixMergerInsertByRowid(PrefixMerger ** ppHead,PrefixMerger * p)5023 static void fts5PrefixMergerInsertByRowid(
5024   PrefixMerger **ppHead,
5025   PrefixMerger *p
5026 ){
5027   if( p->iter.aPoslist ){
5028     PrefixMerger **pp = ppHead;
5029     while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
5030       pp = &(*pp)->pNext;
5031     }
5032     p->pNext = *pp;
5033     *pp = p;
5034   }
5035 }
5036 
fts5PrefixMergerInsertByPosition(PrefixMerger ** ppHead,PrefixMerger * p)5037 static void fts5PrefixMergerInsertByPosition(
5038   PrefixMerger **ppHead,
5039   PrefixMerger *p
5040 ){
5041   if( p->iPos>=0 ){
5042     PrefixMerger **pp = ppHead;
5043     while( *pp && p->iPos>(*pp)->iPos ){
5044       pp = &(*pp)->pNext;
5045     }
5046     p->pNext = *pp;
5047     *pp = p;
5048   }
5049 }
5050 
5051 
5052 /*
5053 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
5054 ** doclist in buffer p1.
5055 */
fts5MergePrefixLists(Fts5Index * p,Fts5Buffer * p1,int nBuf,Fts5Buffer * aBuf)5056 static void fts5MergePrefixLists(
5057   Fts5Index *p,                   /* FTS5 backend object */
5058   Fts5Buffer *p1,                 /* First list to merge */
5059   int nBuf,                       /* Number of buffers in array aBuf[] */
5060   Fts5Buffer *aBuf                /* Other lists to merge in */
5061 ){
5062 #define fts5PrefixMergerNextPosition(p) \
5063   sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
5064 #define FTS5_MERGE_NLIST 16
5065   PrefixMerger aMerger[FTS5_MERGE_NLIST];
5066   PrefixMerger *pHead = 0;
5067   int i;
5068   int nOut = 0;
5069   Fts5Buffer out = {0, 0, 0};
5070   Fts5Buffer tmp = {0, 0, 0};
5071   i64 iLastRowid = 0;
5072 
5073   /* Initialize a doclist-iterator for each input buffer. Arrange them in
5074   ** a linked-list starting at pHead in ascending order of rowid. Avoid
5075   ** linking any iterators already at EOF into the linked list at all. */
5076   assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
5077   memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
5078   pHead = &aMerger[nBuf];
5079   fts5DoclistIterInit(p1, &pHead->iter);
5080   for(i=0; i<nBuf; i++){
5081     fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
5082     fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
5083     nOut += aBuf[i].n;
5084   }
5085   if( nOut==0 ) return;
5086   nOut += p1->n + 9 + 10*nBuf;
5087 
5088   /* The maximum size of the output is equal to the sum of the
5089   ** input sizes + 1 varint (9 bytes). The extra varint is because if the
5090   ** first rowid in one input is a large negative number, and the first in
5091   ** the other a non-negative number, the delta for the non-negative
5092   ** number will be larger on disk than the literal integer value
5093   ** was.
5094   **
5095   ** Or, if the input position-lists are corrupt, then the output might
5096   ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
5097   ** (the value PoslistNext64() uses for EOF) as a position and appending
5098   ** it to the output. This can happen at most once for each input
5099   ** position-list, hence (nBuf+1) 10 byte paddings.  */
5100   if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
5101 
5102   while( pHead ){
5103     fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
5104 
5105     if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
5106       /* Merge data from two or more poslists */
5107       i64 iPrev = 0;
5108       int nTmp = FTS5_DATA_ZERO_PADDING;
5109       int nMerge = 0;
5110       PrefixMerger *pSave = pHead;
5111       PrefixMerger *pThis = 0;
5112       int nTail = 0;
5113 
5114       pHead = 0;
5115       while( pSave && pSave->iter.iRowid==iLastRowid ){
5116         PrefixMerger *pNext = pSave->pNext;
5117         pSave->iOff = 0;
5118         pSave->iPos = 0;
5119         pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
5120         fts5PrefixMergerNextPosition(pSave);
5121         nTmp += pSave->iter.nPoslist + 10;
5122         nMerge++;
5123         fts5PrefixMergerInsertByPosition(&pHead, pSave);
5124         pSave = pNext;
5125       }
5126 
5127       if( pHead==0 || pHead->pNext==0 ){
5128         p->rc = FTS5_CORRUPT;
5129         break;
5130       }
5131 
5132       /* See the earlier comment in this function for an explanation of why
5133       ** corrupt input position lists might cause the output to consume
5134       ** at most nMerge*10 bytes of unexpected space. */
5135       if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
5136         break;
5137       }
5138       fts5BufferZero(&tmp);
5139 
5140       pThis = pHead;
5141       pHead = pThis->pNext;
5142       sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5143       fts5PrefixMergerNextPosition(pThis);
5144       fts5PrefixMergerInsertByPosition(&pHead, pThis);
5145 
5146       while( pHead->pNext ){
5147         pThis = pHead;
5148         if( pThis->iPos!=iPrev ){
5149           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
5150         }
5151         fts5PrefixMergerNextPosition(pThis);
5152         pHead = pThis->pNext;
5153         fts5PrefixMergerInsertByPosition(&pHead, pThis);
5154       }
5155 
5156       if( pHead->iPos!=iPrev ){
5157         sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
5158       }
5159       nTail = pHead->iter.nPoslist - pHead->iOff;
5160 
5161       /* WRITEPOSLISTSIZE */
5162       assert_nc( tmp.n+nTail<=nTmp );
5163       assert( tmp.n+nTail<=nTmp+nMerge*10 );
5164       if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
5165         if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
5166         break;
5167       }
5168       fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
5169       fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
5170       if( nTail>0 ){
5171         fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
5172       }
5173 
5174       pHead = pSave;
5175       for(i=0; i<nBuf+1; i++){
5176         PrefixMerger *pX = &aMerger[i];
5177         if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
5178           fts5DoclistIterNext(&pX->iter);
5179           fts5PrefixMergerInsertByRowid(&pHead, pX);
5180         }
5181       }
5182 
5183     }else{
5184       /* Copy poslist from pHead to output */
5185       PrefixMerger *pThis = pHead;
5186       Fts5DoclistIter *pI = &pThis->iter;
5187       fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
5188       fts5DoclistIterNext(pI);
5189       pHead = pThis->pNext;
5190       fts5PrefixMergerInsertByRowid(&pHead, pThis);
5191     }
5192   }
5193 
5194   fts5BufferFree(p1);
5195   fts5BufferFree(&tmp);
5196   memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
5197   *p1 = out;
5198 }
5199 
fts5SetupPrefixIter(Fts5Index * p,int bDesc,int iIdx,u8 * pToken,int nToken,Fts5Colset * pColset,Fts5Iter ** ppIter)5200 static void fts5SetupPrefixIter(
5201   Fts5Index *p,                   /* Index to read from */
5202   int bDesc,                      /* True for "ORDER BY rowid DESC" */
5203   int iIdx,                       /* Index to scan for data */
5204   u8 *pToken,                     /* Buffer containing prefix to match */
5205   int nToken,                     /* Size of buffer pToken in bytes */
5206   Fts5Colset *pColset,            /* Restrict matches to these columns */
5207   Fts5Iter **ppIter          /* OUT: New iterator */
5208 ){
5209   Fts5Structure *pStruct;
5210   Fts5Buffer *aBuf;
5211   int nBuf = 32;
5212   int nMerge = 1;
5213 
5214   void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
5215   void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
5216   if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
5217     xMerge = fts5MergeRowidLists;
5218     xAppend = fts5AppendRowid;
5219   }else{
5220     nMerge = FTS5_MERGE_NLIST-1;
5221     nBuf = nMerge*8;   /* Sufficient to merge (16^8)==(2^32) lists */
5222     xMerge = fts5MergePrefixLists;
5223     xAppend = fts5AppendPoslist;
5224   }
5225 
5226   aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
5227   pStruct = fts5StructureRead(p);
5228 
5229   if( aBuf && pStruct ){
5230     const int flags = FTS5INDEX_QUERY_SCAN
5231                     | FTS5INDEX_QUERY_SKIPEMPTY
5232                     | FTS5INDEX_QUERY_NOOUTPUT;
5233     int i;
5234     i64 iLastRowid = 0;
5235     Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
5236     Fts5Data *pData;
5237     Fts5Buffer doclist;
5238     int bNewTerm = 1;
5239 
5240     memset(&doclist, 0, sizeof(doclist));
5241     if( iIdx!=0 ){
5242       int dummy = 0;
5243       const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
5244       pToken[0] = FTS5_MAIN_PREFIX;
5245       fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
5246       fts5IterSetOutputCb(&p->rc, p1);
5247       for(;
5248         fts5MultiIterEof(p, p1)==0;
5249         fts5MultiIterNext2(p, p1, &dummy)
5250       ){
5251         Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5252         p1->xSetOutputs(p1, pSeg);
5253         if( p1->base.nData ){
5254           xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5255           iLastRowid = p1->base.iRowid;
5256         }
5257       }
5258       fts5MultiIterFree(p1);
5259     }
5260 
5261     pToken[0] = FTS5_MAIN_PREFIX + iIdx;
5262     fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
5263     fts5IterSetOutputCb(&p->rc, p1);
5264     for( /* no-op */ ;
5265         fts5MultiIterEof(p, p1)==0;
5266         fts5MultiIterNext2(p, p1, &bNewTerm)
5267     ){
5268       Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
5269       int nTerm = pSeg->term.n;
5270       const u8 *pTerm = pSeg->term.p;
5271       p1->xSetOutputs(p1, pSeg);
5272 
5273       assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
5274       if( bNewTerm ){
5275         if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
5276       }
5277 
5278       if( p1->base.nData==0 ) continue;
5279 
5280       if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
5281         for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
5282           int i1 = i*nMerge;
5283           int iStore;
5284           assert( i1+nMerge<=nBuf );
5285           for(iStore=i1; iStore<i1+nMerge; iStore++){
5286             if( aBuf[iStore].n==0 ){
5287               fts5BufferSwap(&doclist, &aBuf[iStore]);
5288               fts5BufferZero(&doclist);
5289               break;
5290             }
5291           }
5292           if( iStore==i1+nMerge ){
5293             xMerge(p, &doclist, nMerge, &aBuf[i1]);
5294             for(iStore=i1; iStore<i1+nMerge; iStore++){
5295               fts5BufferZero(&aBuf[iStore]);
5296             }
5297           }
5298         }
5299         iLastRowid = 0;
5300       }
5301 
5302       xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
5303       iLastRowid = p1->base.iRowid;
5304     }
5305 
5306     assert( (nBuf%nMerge)==0 );
5307     for(i=0; i<nBuf; i+=nMerge){
5308       int iFree;
5309       if( p->rc==SQLITE_OK ){
5310         xMerge(p, &doclist, nMerge, &aBuf[i]);
5311       }
5312       for(iFree=i; iFree<i+nMerge; iFree++){
5313         fts5BufferFree(&aBuf[iFree]);
5314       }
5315     }
5316     fts5MultiIterFree(p1);
5317 
5318     pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
5319     if( pData ){
5320       pData->p = (u8*)&pData[1];
5321       pData->nn = pData->szLeaf = doclist.n;
5322       if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
5323       fts5MultiIterNew2(p, pData, bDesc, ppIter);
5324     }
5325     fts5BufferFree(&doclist);
5326   }
5327 
5328   fts5StructureRelease(pStruct);
5329   sqlite3_free(aBuf);
5330 }
5331 
5332 
5333 /*
5334 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
5335 ** to the document with rowid iRowid.
5336 */
sqlite3Fts5IndexBeginWrite(Fts5Index * p,int bDelete,i64 iRowid)5337 int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
5338   assert( p->rc==SQLITE_OK );
5339 
5340   /* Allocate the hash table if it has not already been allocated */
5341   if( p->pHash==0 ){
5342     p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
5343   }
5344 
5345   /* Flush the hash table to disk if required */
5346   if( iRowid<p->iWriteRowid
5347    || (iRowid==p->iWriteRowid && p->bDelete==0)
5348    || (p->nPendingData > p->pConfig->nHashSize)
5349   ){
5350     fts5IndexFlush(p);
5351   }
5352 
5353   p->iWriteRowid = iRowid;
5354   p->bDelete = bDelete;
5355   return fts5IndexReturn(p);
5356 }
5357 
5358 /*
5359 ** Commit data to disk.
5360 */
sqlite3Fts5IndexSync(Fts5Index * p)5361 int sqlite3Fts5IndexSync(Fts5Index *p){
5362   assert( p->rc==SQLITE_OK );
5363   fts5IndexFlush(p);
5364   sqlite3Fts5IndexCloseReader(p);
5365   return fts5IndexReturn(p);
5366 }
5367 
5368 /*
5369 ** Discard any data stored in the in-memory hash tables. Do not write it
5370 ** to the database. Additionally, assume that the contents of the %_data
5371 ** table may have changed on disk. So any in-memory caches of %_data
5372 ** records must be invalidated.
5373 */
sqlite3Fts5IndexRollback(Fts5Index * p)5374 int sqlite3Fts5IndexRollback(Fts5Index *p){
5375   sqlite3Fts5IndexCloseReader(p);
5376   fts5IndexDiscardData(p);
5377   fts5StructureInvalidate(p);
5378   /* assert( p->rc==SQLITE_OK ); */
5379   return SQLITE_OK;
5380 }
5381 
5382 /*
5383 ** The %_data table is completely empty when this function is called. This
5384 ** function populates it with the initial structure objects for each index,
5385 ** and the initial version of the "averages" record (a zero-byte blob).
5386 */
sqlite3Fts5IndexReinit(Fts5Index * p)5387 int sqlite3Fts5IndexReinit(Fts5Index *p){
5388   Fts5Structure s;
5389   fts5StructureInvalidate(p);
5390   fts5IndexDiscardData(p);
5391   memset(&s, 0, sizeof(Fts5Structure));
5392   fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
5393   fts5StructureWrite(p, &s);
5394   return fts5IndexReturn(p);
5395 }
5396 
5397 /*
5398 ** Open a new Fts5Index handle. If the bCreate argument is true, create
5399 ** and initialize the underlying %_data table.
5400 **
5401 ** If successful, set *pp to point to the new object and return SQLITE_OK.
5402 ** Otherwise, set *pp to NULL and return an SQLite error code.
5403 */
sqlite3Fts5IndexOpen(Fts5Config * pConfig,int bCreate,Fts5Index ** pp,char ** pzErr)5404 int sqlite3Fts5IndexOpen(
5405   Fts5Config *pConfig,
5406   int bCreate,
5407   Fts5Index **pp,
5408   char **pzErr
5409 ){
5410   int rc = SQLITE_OK;
5411   Fts5Index *p;                   /* New object */
5412 
5413   *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
5414   if( rc==SQLITE_OK ){
5415     p->pConfig = pConfig;
5416     p->nWorkUnit = FTS5_WORK_UNIT;
5417     p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
5418     if( p->zDataTbl && bCreate ){
5419       rc = sqlite3Fts5CreateTable(
5420           pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
5421       );
5422       if( rc==SQLITE_OK ){
5423         rc = sqlite3Fts5CreateTable(pConfig, "idx",
5424             "segid, term, pgno, PRIMARY KEY(segid, term)",
5425             1, pzErr
5426         );
5427       }
5428       if( rc==SQLITE_OK ){
5429         rc = sqlite3Fts5IndexReinit(p);
5430       }
5431     }
5432   }
5433 
5434   assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
5435   if( rc ){
5436     sqlite3Fts5IndexClose(p);
5437     *pp = 0;
5438   }
5439   return rc;
5440 }
5441 
5442 /*
5443 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
5444 */
sqlite3Fts5IndexClose(Fts5Index * p)5445 int sqlite3Fts5IndexClose(Fts5Index *p){
5446   int rc = SQLITE_OK;
5447   if( p ){
5448     assert( p->pReader==0 );
5449     fts5StructureInvalidate(p);
5450     sqlite3_finalize(p->pWriter);
5451     sqlite3_finalize(p->pDeleter);
5452     sqlite3_finalize(p->pIdxWriter);
5453     sqlite3_finalize(p->pIdxDeleter);
5454     sqlite3_finalize(p->pIdxSelect);
5455     sqlite3_finalize(p->pDataVersion);
5456     sqlite3Fts5HashFree(p->pHash);
5457     sqlite3_free(p->zDataTbl);
5458     sqlite3_free(p);
5459   }
5460   return rc;
5461 }
5462 
5463 /*
5464 ** Argument p points to a buffer containing utf-8 text that is n bytes in
5465 ** size. Return the number of bytes in the nChar character prefix of the
5466 ** buffer, or 0 if there are less than nChar characters in total.
5467 */
sqlite3Fts5IndexCharlenToBytelen(const char * p,int nByte,int nChar)5468 int sqlite3Fts5IndexCharlenToBytelen(
5469   const char *p,
5470   int nByte,
5471   int nChar
5472 ){
5473   int n = 0;
5474   int i;
5475   for(i=0; i<nChar; i++){
5476     if( n>=nByte ) return 0;      /* Input contains fewer than nChar chars */
5477     if( (unsigned char)p[n++]>=0xc0 ){
5478       if( n>=nByte ) return 0;
5479       while( (p[n] & 0xc0)==0x80 ){
5480         n++;
5481         if( n>=nByte ){
5482           if( i+1==nChar ) break;
5483           return 0;
5484         }
5485       }
5486     }
5487   }
5488   return n;
5489 }
5490 
5491 /*
5492 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
5493 ** unicode characters in the string.
5494 */
fts5IndexCharlen(const char * pIn,int nIn)5495 static int fts5IndexCharlen(const char *pIn, int nIn){
5496   int nChar = 0;
5497   int i = 0;
5498   while( i<nIn ){
5499     if( (unsigned char)pIn[i++]>=0xc0 ){
5500       while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
5501     }
5502     nChar++;
5503   }
5504   return nChar;
5505 }
5506 
5507 /*
5508 ** Insert or remove data to or from the index. Each time a document is
5509 ** added to or removed from the index, this function is called one or more
5510 ** times.
5511 **
5512 ** For an insert, it must be called once for each token in the new document.
5513 ** If the operation is a delete, it must be called (at least) once for each
5514 ** unique token in the document with an iCol value less than zero. The iPos
5515 ** argument is ignored for a delete.
5516 */
sqlite3Fts5IndexWrite(Fts5Index * p,int iCol,int iPos,const char * pToken,int nToken)5517 int sqlite3Fts5IndexWrite(
5518   Fts5Index *p,                   /* Index to write to */
5519   int iCol,                       /* Column token appears in (-ve -> delete) */
5520   int iPos,                       /* Position of token within column */
5521   const char *pToken, int nToken  /* Token to add or remove to or from index */
5522 ){
5523   int i;                          /* Used to iterate through indexes */
5524   int rc = SQLITE_OK;             /* Return code */
5525   Fts5Config *pConfig = p->pConfig;
5526 
5527   assert( p->rc==SQLITE_OK );
5528   assert( (iCol<0)==p->bDelete );
5529 
5530   /* Add the entry to the main terms index. */
5531   rc = sqlite3Fts5HashWrite(
5532       p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
5533   );
5534 
5535   for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
5536     const int nChar = pConfig->aPrefix[i];
5537     int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
5538     if( nByte ){
5539       rc = sqlite3Fts5HashWrite(p->pHash,
5540           p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
5541           nByte
5542       );
5543     }
5544   }
5545 
5546   return rc;
5547 }
5548 
5549 /*
5550 ** Open a new iterator to iterate though all rowid that match the
5551 ** specified token or token prefix.
5552 */
sqlite3Fts5IndexQuery(Fts5Index * p,const char * pToken,int nToken,int flags,Fts5Colset * pColset,Fts5IndexIter ** ppIter)5553 int sqlite3Fts5IndexQuery(
5554   Fts5Index *p,                   /* FTS index to query */
5555   const char *pToken, int nToken, /* Token (or prefix) to query for */
5556   int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
5557   Fts5Colset *pColset,            /* Match these columns only */
5558   Fts5IndexIter **ppIter          /* OUT: New iterator object */
5559 ){
5560   Fts5Config *pConfig = p->pConfig;
5561   Fts5Iter *pRet = 0;
5562   Fts5Buffer buf = {0, 0, 0};
5563 
5564   /* If the QUERY_SCAN flag is set, all other flags must be clear. */
5565   assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
5566 
5567   if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
5568     int iIdx = 0;                 /* Index to search */
5569     int iPrefixIdx = 0;           /* +1 prefix index */
5570     if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
5571 
5572     /* Figure out which index to search and set iIdx accordingly. If this
5573     ** is a prefix query for which there is no prefix index, set iIdx to
5574     ** greater than pConfig->nPrefix to indicate that the query will be
5575     ** satisfied by scanning multiple terms in the main index.
5576     **
5577     ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
5578     ** prefix-query. Instead of using a prefix-index (if one exists),
5579     ** evaluate the prefix query using the main FTS index. This is used
5580     ** for internal sanity checking by the integrity-check in debug
5581     ** mode only.  */
5582 #ifdef SQLITE_DEBUG
5583     if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
5584       assert( flags & FTS5INDEX_QUERY_PREFIX );
5585       iIdx = 1+pConfig->nPrefix;
5586     }else
5587 #endif
5588     if( flags & FTS5INDEX_QUERY_PREFIX ){
5589       int nChar = fts5IndexCharlen(pToken, nToken);
5590       for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
5591         int nIdxChar = pConfig->aPrefix[iIdx-1];
5592         if( nIdxChar==nChar ) break;
5593         if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
5594       }
5595     }
5596 
5597     if( iIdx<=pConfig->nPrefix ){
5598       /* Straight index lookup */
5599       Fts5Structure *pStruct = fts5StructureRead(p);
5600       buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
5601       if( pStruct ){
5602         fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
5603             pColset, buf.p, nToken+1, -1, 0, &pRet
5604         );
5605         fts5StructureRelease(pStruct);
5606       }
5607     }else{
5608       /* Scan multiple terms in the main index */
5609       int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
5610       fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
5611       if( pRet==0 ){
5612         assert( p->rc!=SQLITE_OK );
5613       }else{
5614         assert( pRet->pColset==0 );
5615         fts5IterSetOutputCb(&p->rc, pRet);
5616         if( p->rc==SQLITE_OK ){
5617           Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
5618           if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
5619         }
5620       }
5621     }
5622 
5623     if( p->rc ){
5624       sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
5625       pRet = 0;
5626       sqlite3Fts5IndexCloseReader(p);
5627     }
5628 
5629     *ppIter = (Fts5IndexIter*)pRet;
5630     sqlite3Fts5BufferFree(&buf);
5631   }
5632   return fts5IndexReturn(p);
5633 }
5634 
5635 /*
5636 ** Return true if the iterator passed as the only argument is at EOF.
5637 */
5638 /*
5639 ** Move to the next matching rowid.
5640 */
sqlite3Fts5IterNext(Fts5IndexIter * pIndexIter)5641 int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
5642   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5643   assert( pIter->pIndex->rc==SQLITE_OK );
5644   fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
5645   return fts5IndexReturn(pIter->pIndex);
5646 }
5647 
5648 /*
5649 ** Move to the next matching term/rowid. Used by the fts5vocab module.
5650 */
sqlite3Fts5IterNextScan(Fts5IndexIter * pIndexIter)5651 int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
5652   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5653   Fts5Index *p = pIter->pIndex;
5654 
5655   assert( pIter->pIndex->rc==SQLITE_OK );
5656 
5657   fts5MultiIterNext(p, pIter, 0, 0);
5658   if( p->rc==SQLITE_OK ){
5659     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
5660     if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
5661       fts5DataRelease(pSeg->pLeaf);
5662       pSeg->pLeaf = 0;
5663       pIter->base.bEof = 1;
5664     }
5665   }
5666 
5667   return fts5IndexReturn(pIter->pIndex);
5668 }
5669 
5670 /*
5671 ** Move to the next matching rowid that occurs at or after iMatch. The
5672 ** definition of "at or after" depends on whether this iterator iterates
5673 ** in ascending or descending rowid order.
5674 */
sqlite3Fts5IterNextFrom(Fts5IndexIter * pIndexIter,i64 iMatch)5675 int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
5676   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5677   fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
5678   return fts5IndexReturn(pIter->pIndex);
5679 }
5680 
5681 /*
5682 ** Return the current term.
5683 */
sqlite3Fts5IterTerm(Fts5IndexIter * pIndexIter,int * pn)5684 const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
5685   int n;
5686   const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
5687   assert_nc( z || n<=1 );
5688   *pn = n-1;
5689   return (z ? &z[1] : 0);
5690 }
5691 
5692 /*
5693 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
5694 */
sqlite3Fts5IterClose(Fts5IndexIter * pIndexIter)5695 void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
5696   if( pIndexIter ){
5697     Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
5698     Fts5Index *pIndex = pIter->pIndex;
5699     fts5MultiIterFree(pIter);
5700     sqlite3Fts5IndexCloseReader(pIndex);
5701   }
5702 }
5703 
5704 /*
5705 ** Read and decode the "averages" record from the database.
5706 **
5707 ** Parameter anSize must point to an array of size nCol, where nCol is
5708 ** the number of user defined columns in the FTS table.
5709 */
sqlite3Fts5IndexGetAverages(Fts5Index * p,i64 * pnRow,i64 * anSize)5710 int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
5711   int nCol = p->pConfig->nCol;
5712   Fts5Data *pData;
5713 
5714   *pnRow = 0;
5715   memset(anSize, 0, sizeof(i64) * nCol);
5716   pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
5717   if( p->rc==SQLITE_OK && pData->nn ){
5718     int i = 0;
5719     int iCol;
5720     i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
5721     for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
5722       i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
5723     }
5724   }
5725 
5726   fts5DataRelease(pData);
5727   return fts5IndexReturn(p);
5728 }
5729 
5730 /*
5731 ** Replace the current "averages" record with the contents of the buffer
5732 ** supplied as the second argument.
5733 */
sqlite3Fts5IndexSetAverages(Fts5Index * p,const u8 * pData,int nData)5734 int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
5735   assert( p->rc==SQLITE_OK );
5736   fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
5737   return fts5IndexReturn(p);
5738 }
5739 
5740 /*
5741 ** Return the total number of blocks this module has read from the %_data
5742 ** table since it was created.
5743 */
sqlite3Fts5IndexReads(Fts5Index * p)5744 int sqlite3Fts5IndexReads(Fts5Index *p){
5745   return p->nRead;
5746 }
5747 
5748 /*
5749 ** Set the 32-bit cookie value stored at the start of all structure
5750 ** records to the value passed as the second argument.
5751 **
5752 ** Return SQLITE_OK if successful, or an SQLite error code if an error
5753 ** occurs.
5754 */
sqlite3Fts5IndexSetCookie(Fts5Index * p,int iNew)5755 int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
5756   int rc;                              /* Return code */
5757   Fts5Config *pConfig = p->pConfig;    /* Configuration object */
5758   u8 aCookie[4];                       /* Binary representation of iNew */
5759   sqlite3_blob *pBlob = 0;
5760 
5761   assert( p->rc==SQLITE_OK );
5762   sqlite3Fts5Put32(aCookie, iNew);
5763 
5764   rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
5765       "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
5766   );
5767   if( rc==SQLITE_OK ){
5768     sqlite3_blob_write(pBlob, aCookie, 4, 0);
5769     rc = sqlite3_blob_close(pBlob);
5770   }
5771 
5772   return rc;
5773 }
5774 
sqlite3Fts5IndexLoadConfig(Fts5Index * p)5775 int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
5776   Fts5Structure *pStruct;
5777   pStruct = fts5StructureRead(p);
5778   fts5StructureRelease(pStruct);
5779   return fts5IndexReturn(p);
5780 }
5781 
5782 
5783 /*************************************************************************
5784 **************************************************************************
5785 ** Below this point is the implementation of the integrity-check
5786 ** functionality.
5787 */
5788 
5789 /*
5790 ** Return a simple checksum value based on the arguments.
5791 */
sqlite3Fts5IndexEntryCksum(i64 iRowid,int iCol,int iPos,int iIdx,const char * pTerm,int nTerm)5792 u64 sqlite3Fts5IndexEntryCksum(
5793   i64 iRowid,
5794   int iCol,
5795   int iPos,
5796   int iIdx,
5797   const char *pTerm,
5798   int nTerm
5799 ){
5800   int i;
5801   u64 ret = iRowid;
5802   ret += (ret<<3) + iCol;
5803   ret += (ret<<3) + iPos;
5804   if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
5805   for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
5806   return ret;
5807 }
5808 
5809 #ifdef SQLITE_DEBUG
5810 /*
5811 ** This function is purely an internal test. It does not contribute to
5812 ** FTS functionality, or even the integrity-check, in any way.
5813 **
5814 ** Instead, it tests that the same set of pgno/rowid combinations are
5815 ** visited regardless of whether the doclist-index identified by parameters
5816 ** iSegid/iLeaf is iterated in forwards or reverse order.
5817 */
fts5TestDlidxReverse(Fts5Index * p,int iSegid,int iLeaf)5818 static void fts5TestDlidxReverse(
5819   Fts5Index *p,
5820   int iSegid,                     /* Segment id to load from */
5821   int iLeaf                       /* Load doclist-index for this leaf */
5822 ){
5823   Fts5DlidxIter *pDlidx = 0;
5824   u64 cksum1 = 13;
5825   u64 cksum2 = 13;
5826 
5827   for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
5828       fts5DlidxIterEof(p, pDlidx)==0;
5829       fts5DlidxIterNext(p, pDlidx)
5830   ){
5831     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5832     int pgno = fts5DlidxIterPgno(pDlidx);
5833     assert( pgno>iLeaf );
5834     cksum1 += iRowid + ((i64)pgno<<32);
5835   }
5836   fts5DlidxIterFree(pDlidx);
5837   pDlidx = 0;
5838 
5839   for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
5840       fts5DlidxIterEof(p, pDlidx)==0;
5841       fts5DlidxIterPrev(p, pDlidx)
5842   ){
5843     i64 iRowid = fts5DlidxIterRowid(pDlidx);
5844     int pgno = fts5DlidxIterPgno(pDlidx);
5845     assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
5846     cksum2 += iRowid + ((i64)pgno<<32);
5847   }
5848   fts5DlidxIterFree(pDlidx);
5849   pDlidx = 0;
5850 
5851   if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
5852 }
5853 
fts5QueryCksum(Fts5Index * p,int iIdx,const char * z,int n,int flags,u64 * pCksum)5854 static int fts5QueryCksum(
5855   Fts5Index *p,                   /* Fts5 index object */
5856   int iIdx,
5857   const char *z,                  /* Index key to query for */
5858   int n,                          /* Size of index key in bytes */
5859   int flags,                      /* Flags for Fts5IndexQuery */
5860   u64 *pCksum                     /* IN/OUT: Checksum value */
5861 ){
5862   int eDetail = p->pConfig->eDetail;
5863   u64 cksum = *pCksum;
5864   Fts5IndexIter *pIter = 0;
5865   int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
5866 
5867   while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
5868     i64 rowid = pIter->iRowid;
5869 
5870     if( eDetail==FTS5_DETAIL_NONE ){
5871       cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
5872     }else{
5873       Fts5PoslistReader sReader;
5874       for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
5875           sReader.bEof==0;
5876           sqlite3Fts5PoslistReaderNext(&sReader)
5877       ){
5878         int iCol = FTS5_POS2COLUMN(sReader.iPos);
5879         int iOff = FTS5_POS2OFFSET(sReader.iPos);
5880         cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
5881       }
5882     }
5883     if( rc==SQLITE_OK ){
5884       rc = sqlite3Fts5IterNext(pIter);
5885     }
5886   }
5887   sqlite3Fts5IterClose(pIter);
5888 
5889   *pCksum = cksum;
5890   return rc;
5891 }
5892 
5893 /*
5894 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
5895 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
5896 ** contain valid utf-8, return non-zero.
5897 */
fts5TestUtf8(const char * z,int n)5898 static int fts5TestUtf8(const char *z, int n){
5899   int i = 0;
5900   assert_nc( n>0 );
5901   while( i<n ){
5902     if( (z[i] & 0x80)==0x00 ){
5903       i++;
5904     }else
5905     if( (z[i] & 0xE0)==0xC0 ){
5906       if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
5907       i += 2;
5908     }else
5909     if( (z[i] & 0xF0)==0xE0 ){
5910       if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5911       i += 3;
5912     }else
5913     if( (z[i] & 0xF8)==0xF0 ){
5914       if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
5915       if( (z[i+2] & 0xC0)!=0x80 ) return 1;
5916       i += 3;
5917     }else{
5918       return 1;
5919     }
5920   }
5921 
5922   return 0;
5923 }
5924 
5925 /*
5926 ** This function is also purely an internal test. It does not contribute to
5927 ** FTS functionality, or even the integrity-check, in any way.
5928 */
fts5TestTerm(Fts5Index * p,Fts5Buffer * pPrev,const char * z,int n,u64 expected,u64 * pCksum)5929 static void fts5TestTerm(
5930   Fts5Index *p,
5931   Fts5Buffer *pPrev,              /* Previous term */
5932   const char *z, int n,           /* Possibly new term to test */
5933   u64 expected,
5934   u64 *pCksum
5935 ){
5936   int rc = p->rc;
5937   if( pPrev->n==0 ){
5938     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5939   }else
5940   if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
5941     u64 cksum3 = *pCksum;
5942     const char *zTerm = (const char*)&pPrev->p[1];  /* term sans prefix-byte */
5943     int nTerm = pPrev->n-1;            /* Size of zTerm in bytes */
5944     int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
5945     int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
5946     u64 ck1 = 0;
5947     u64 ck2 = 0;
5948 
5949     /* Check that the results returned for ASC and DESC queries are
5950     ** the same. If not, call this corruption.  */
5951     rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
5952     if( rc==SQLITE_OK ){
5953       int f = flags|FTS5INDEX_QUERY_DESC;
5954       rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5955     }
5956     if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5957 
5958     /* If this is a prefix query, check that the results returned if the
5959     ** the index is disabled are the same. In both ASC and DESC order.
5960     **
5961     ** This check may only be performed if the hash table is empty. This
5962     ** is because the hash table only supports a single scan query at
5963     ** a time, and the multi-iter loop from which this function is called
5964     ** is already performing such a scan.
5965     **
5966     ** Also only do this if buffer zTerm contains nTerm bytes of valid
5967     ** utf-8. Otherwise, the last part of the buffer contents might contain
5968     ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
5969     ** character stored in the main fts index, which will cause the
5970     ** test to fail.  */
5971     if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
5972       if( iIdx>0 && rc==SQLITE_OK ){
5973         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
5974         ck2 = 0;
5975         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5976         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5977       }
5978       if( iIdx>0 && rc==SQLITE_OK ){
5979         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
5980         ck2 = 0;
5981         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
5982         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
5983       }
5984     }
5985 
5986     cksum3 ^= ck1;
5987     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
5988 
5989     if( rc==SQLITE_OK && cksum3!=expected ){
5990       rc = FTS5_CORRUPT;
5991     }
5992     *pCksum = cksum3;
5993   }
5994   p->rc = rc;
5995 }
5996 
5997 #else
5998 # define fts5TestDlidxReverse(x,y,z)
5999 # define fts5TestTerm(u,v,w,x,y,z)
6000 #endif
6001 
6002 /*
6003 ** Check that:
6004 **
6005 **   1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
6006 **      contain zero terms.
6007 **   2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
6008 **      contain zero rowids.
6009 */
fts5IndexIntegrityCheckEmpty(Fts5Index * p,Fts5StructureSegment * pSeg,int iFirst,int iNoRowid,int iLast)6010 static void fts5IndexIntegrityCheckEmpty(
6011   Fts5Index *p,
6012   Fts5StructureSegment *pSeg,     /* Segment to check internal consistency */
6013   int iFirst,
6014   int iNoRowid,
6015   int iLast
6016 ){
6017   int i;
6018 
6019   /* Now check that the iter.nEmpty leaves following the current leaf
6020   ** (a) exist and (b) contain no terms. */
6021   for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
6022     Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
6023     if( pLeaf ){
6024       if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
6025       if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
6026     }
6027     fts5DataRelease(pLeaf);
6028   }
6029 }
6030 
fts5IntegrityCheckPgidx(Fts5Index * p,Fts5Data * pLeaf)6031 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
6032   int iTermOff = 0;
6033   int ii;
6034 
6035   Fts5Buffer buf1 = {0,0,0};
6036   Fts5Buffer buf2 = {0,0,0};
6037 
6038   ii = pLeaf->szLeaf;
6039   while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
6040     int res;
6041     int iOff;
6042     int nIncr;
6043 
6044     ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
6045     iTermOff += nIncr;
6046     iOff = iTermOff;
6047 
6048     if( iOff>=pLeaf->szLeaf ){
6049       p->rc = FTS5_CORRUPT;
6050     }else if( iTermOff==nIncr ){
6051       int nByte;
6052       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6053       if( (iOff+nByte)>pLeaf->szLeaf ){
6054         p->rc = FTS5_CORRUPT;
6055       }else{
6056         fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6057       }
6058     }else{
6059       int nKeep, nByte;
6060       iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
6061       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
6062       if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
6063         p->rc = FTS5_CORRUPT;
6064       }else{
6065         buf1.n = nKeep;
6066         fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
6067       }
6068 
6069       if( p->rc==SQLITE_OK ){
6070         res = fts5BufferCompare(&buf1, &buf2);
6071         if( res<=0 ) p->rc = FTS5_CORRUPT;
6072       }
6073     }
6074     fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
6075   }
6076 
6077   fts5BufferFree(&buf1);
6078   fts5BufferFree(&buf2);
6079 }
6080 
fts5IndexIntegrityCheckSegment(Fts5Index * p,Fts5StructureSegment * pSeg)6081 static void fts5IndexIntegrityCheckSegment(
6082   Fts5Index *p,                   /* FTS5 backend object */
6083   Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
6084 ){
6085   Fts5Config *pConfig = p->pConfig;
6086   sqlite3_stmt *pStmt = 0;
6087   int rc2;
6088   int iIdxPrevLeaf = pSeg->pgnoFirst-1;
6089   int iDlidxPrevLeaf = pSeg->pgnoLast;
6090 
6091   if( pSeg->pgnoFirst==0 ) return;
6092 
6093   fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
6094       "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
6095       "ORDER BY 1, 2",
6096       pConfig->zDb, pConfig->zName, pSeg->iSegid
6097   ));
6098 
6099   /* Iterate through the b-tree hierarchy.  */
6100   while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
6101     i64 iRow;                     /* Rowid for this leaf */
6102     Fts5Data *pLeaf;              /* Data for this leaf */
6103 
6104     const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
6105     int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
6106     int iIdxLeaf = sqlite3_column_int(pStmt, 2);
6107     int bIdxDlidx = sqlite3_column_int(pStmt, 3);
6108 
6109     /* If the leaf in question has already been trimmed from the segment,
6110     ** ignore this b-tree entry. Otherwise, load it into memory. */
6111     if( iIdxLeaf<pSeg->pgnoFirst ) continue;
6112     iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
6113     pLeaf = fts5LeafRead(p, iRow);
6114     if( pLeaf==0 ) break;
6115 
6116     /* Check that the leaf contains at least one term, and that it is equal
6117     ** to or larger than the split-key in zIdxTerm.  Also check that if there
6118     ** is also a rowid pointer within the leaf page header, it points to a
6119     ** location before the term.  */
6120     if( pLeaf->nn<=pLeaf->szLeaf ){
6121       p->rc = FTS5_CORRUPT;
6122     }else{
6123       int iOff;                   /* Offset of first term on leaf */
6124       int iRowidOff;              /* Offset of first rowid on leaf */
6125       int nTerm;                  /* Size of term on leaf in bytes */
6126       int res;                    /* Comparison of term and split-key */
6127 
6128       iOff = fts5LeafFirstTermOff(pLeaf);
6129       iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6130       if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
6131         p->rc = FTS5_CORRUPT;
6132       }else{
6133         iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
6134         res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
6135         if( res==0 ) res = nTerm - nIdxTerm;
6136         if( res<0 ) p->rc = FTS5_CORRUPT;
6137       }
6138 
6139       fts5IntegrityCheckPgidx(p, pLeaf);
6140     }
6141     fts5DataRelease(pLeaf);
6142     if( p->rc ) break;
6143 
6144     /* Now check that the iter.nEmpty leaves following the current leaf
6145     ** (a) exist and (b) contain no terms. */
6146     fts5IndexIntegrityCheckEmpty(
6147         p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
6148     );
6149     if( p->rc ) break;
6150 
6151     /* If there is a doclist-index, check that it looks right. */
6152     if( bIdxDlidx ){
6153       Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
6154       int iPrevLeaf = iIdxLeaf;
6155       int iSegid = pSeg->iSegid;
6156       int iPg = 0;
6157       i64 iKey;
6158 
6159       for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
6160           fts5DlidxIterEof(p, pDlidx)==0;
6161           fts5DlidxIterNext(p, pDlidx)
6162       ){
6163 
6164         /* Check any rowid-less pages that occur before the current leaf. */
6165         for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
6166           iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
6167           pLeaf = fts5DataRead(p, iKey);
6168           if( pLeaf ){
6169             if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
6170             fts5DataRelease(pLeaf);
6171           }
6172         }
6173         iPrevLeaf = fts5DlidxIterPgno(pDlidx);
6174 
6175         /* Check that the leaf page indicated by the iterator really does
6176         ** contain the rowid suggested by the same. */
6177         iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
6178         pLeaf = fts5DataRead(p, iKey);
6179         if( pLeaf ){
6180           i64 iRowid;
6181           int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
6182           ASSERT_SZLEAF_OK(pLeaf);
6183           if( iRowidOff>=pLeaf->szLeaf ){
6184             p->rc = FTS5_CORRUPT;
6185           }else{
6186             fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
6187             if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
6188           }
6189           fts5DataRelease(pLeaf);
6190         }
6191       }
6192 
6193       iDlidxPrevLeaf = iPg;
6194       fts5DlidxIterFree(pDlidx);
6195       fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
6196     }else{
6197       iDlidxPrevLeaf = pSeg->pgnoLast;
6198       /* TODO: Check there is no doclist index */
6199     }
6200 
6201     iIdxPrevLeaf = iIdxLeaf;
6202   }
6203 
6204   rc2 = sqlite3_finalize(pStmt);
6205   if( p->rc==SQLITE_OK ) p->rc = rc2;
6206 
6207   /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
6208 #if 0
6209   if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
6210     p->rc = FTS5_CORRUPT;
6211   }
6212 #endif
6213 }
6214 
6215 
6216 /*
6217 ** Run internal checks to ensure that the FTS index (a) is internally
6218 ** consistent and (b) contains entries for which the XOR of the checksums
6219 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
6220 **
6221 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
6222 ** checksum does not match. Return SQLITE_OK if all checks pass without
6223 ** error, or some other SQLite error code if another error (e.g. OOM)
6224 ** occurs.
6225 */
sqlite3Fts5IndexIntegrityCheck(Fts5Index * p,u64 cksum,int bUseCksum)6226 int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
6227   int eDetail = p->pConfig->eDetail;
6228   u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
6229   Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
6230   Fts5Iter *pIter;                /* Used to iterate through entire index */
6231   Fts5Structure *pStruct;         /* Index structure */
6232   int iLvl, iSeg;
6233 
6234 #ifdef SQLITE_DEBUG
6235   /* Used by extra internal tests only run if NDEBUG is not defined */
6236   u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
6237   Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
6238 #endif
6239   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
6240 
6241   /* Load the FTS index structure */
6242   pStruct = fts5StructureRead(p);
6243   if( pStruct==0 ){
6244     assert( p->rc!=SQLITE_OK );
6245     return fts5IndexReturn(p);
6246   }
6247 
6248   /* Check that the internal nodes of each segment match the leaves */
6249   for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
6250     for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
6251       Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
6252       fts5IndexIntegrityCheckSegment(p, pSeg);
6253     }
6254   }
6255 
6256   /* The cksum argument passed to this function is a checksum calculated
6257   ** based on all expected entries in the FTS index (including prefix index
6258   ** entries). This block checks that a checksum calculated based on the
6259   ** actual contents of FTS index is identical.
6260   **
6261   ** Two versions of the same checksum are calculated. The first (stack
6262   ** variable cksum2) based on entries extracted from the full-text index
6263   ** while doing a linear scan of each individual index in turn.
6264   **
6265   ** As each term visited by the linear scans, a separate query for the
6266   ** same term is performed. cksum3 is calculated based on the entries
6267   ** extracted by these queries.
6268   */
6269   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
6270       fts5MultiIterEof(p, pIter)==0;
6271       fts5MultiIterNext(p, pIter, 0, 0)
6272   ){
6273     int n;                      /* Size of term in bytes */
6274     i64 iPos = 0;               /* Position read from poslist */
6275     int iOff = 0;               /* Offset within poslist */
6276     i64 iRowid = fts5MultiIterRowid(pIter);
6277     char *z = (char*)fts5MultiIterTerm(pIter, &n);
6278 
6279     /* If this is a new term, query for it. Update cksum3 with the results. */
6280     fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
6281     if( p->rc ) break;
6282 
6283     if( eDetail==FTS5_DETAIL_NONE ){
6284       if( 0==fts5MultiIterIsEmpty(p, pIter) ){
6285         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
6286       }
6287     }else{
6288       poslist.n = 0;
6289       fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
6290       fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
6291       while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
6292         int iCol = FTS5_POS2COLUMN(iPos);
6293         int iTokOff = FTS5_POS2OFFSET(iPos);
6294         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
6295       }
6296     }
6297   }
6298   fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
6299 
6300   fts5MultiIterFree(pIter);
6301   if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
6302 
6303   fts5StructureRelease(pStruct);
6304 #ifdef SQLITE_DEBUG
6305   fts5BufferFree(&term);
6306 #endif
6307   fts5BufferFree(&poslist);
6308   return fts5IndexReturn(p);
6309 }
6310 
6311 /*************************************************************************
6312 **************************************************************************
6313 ** Below this point is the implementation of the fts5_decode() scalar
6314 ** function only.
6315 */
6316 
6317 #ifdef SQLITE_TEST
6318 /*
6319 ** Decode a segment-data rowid from the %_data table. This function is
6320 ** the opposite of macro FTS5_SEGMENT_ROWID().
6321 */
fts5DecodeRowid(i64 iRowid,int * piSegid,int * pbDlidx,int * piHeight,int * piPgno)6322 static void fts5DecodeRowid(
6323   i64 iRowid,                     /* Rowid from %_data table */
6324   int *piSegid,                   /* OUT: Segment id */
6325   int *pbDlidx,                   /* OUT: Dlidx flag */
6326   int *piHeight,                  /* OUT: Height */
6327   int *piPgno                     /* OUT: Page number */
6328 ){
6329   *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
6330   iRowid >>= FTS5_DATA_PAGE_B;
6331 
6332   *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
6333   iRowid >>= FTS5_DATA_HEIGHT_B;
6334 
6335   *pbDlidx = (int)(iRowid & 0x0001);
6336   iRowid >>= FTS5_DATA_DLI_B;
6337 
6338   *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
6339 }
6340 #endif /* SQLITE_TEST */
6341 
6342 #ifdef SQLITE_TEST
fts5DebugRowid(int * pRc,Fts5Buffer * pBuf,i64 iKey)6343 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
6344   int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
6345   fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
6346 
6347   if( iSegid==0 ){
6348     if( iKey==FTS5_AVERAGES_ROWID ){
6349       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
6350     }else{
6351       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
6352     }
6353   }
6354   else{
6355     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
6356         bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
6357     );
6358   }
6359 }
6360 #endif /* SQLITE_TEST */
6361 
6362 #ifdef SQLITE_TEST
fts5DebugStructure(int * pRc,Fts5Buffer * pBuf,Fts5Structure * p)6363 static void fts5DebugStructure(
6364   int *pRc,                       /* IN/OUT: error code */
6365   Fts5Buffer *pBuf,
6366   Fts5Structure *p
6367 ){
6368   int iLvl, iSeg;                 /* Iterate through levels, segments */
6369 
6370   for(iLvl=0; iLvl<p->nLevel; iLvl++){
6371     Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
6372     sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
6373         " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
6374     );
6375     for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
6376       Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
6377       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
6378           pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
6379       );
6380     }
6381     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
6382   }
6383 }
6384 #endif /* SQLITE_TEST */
6385 
6386 #ifdef SQLITE_TEST
6387 /*
6388 ** This is part of the fts5_decode() debugging aid.
6389 **
6390 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
6391 ** function appends a human-readable representation of the same object
6392 ** to the buffer passed as the second argument.
6393 */
fts5DecodeStructure(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6394 static void fts5DecodeStructure(
6395   int *pRc,                       /* IN/OUT: error code */
6396   Fts5Buffer *pBuf,
6397   const u8 *pBlob, int nBlob
6398 ){
6399   int rc;                         /* Return code */
6400   Fts5Structure *p = 0;           /* Decoded structure object */
6401 
6402   rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
6403   if( rc!=SQLITE_OK ){
6404     *pRc = rc;
6405     return;
6406   }
6407 
6408   fts5DebugStructure(pRc, pBuf, p);
6409   fts5StructureRelease(p);
6410 }
6411 #endif /* SQLITE_TEST */
6412 
6413 #ifdef SQLITE_TEST
6414 /*
6415 ** This is part of the fts5_decode() debugging aid.
6416 **
6417 ** Arguments pBlob/nBlob contain an "averages" record. This function
6418 ** appends a human-readable representation of record to the buffer passed
6419 ** as the second argument.
6420 */
fts5DecodeAverages(int * pRc,Fts5Buffer * pBuf,const u8 * pBlob,int nBlob)6421 static void fts5DecodeAverages(
6422   int *pRc,                       /* IN/OUT: error code */
6423   Fts5Buffer *pBuf,
6424   const u8 *pBlob, int nBlob
6425 ){
6426   int i = 0;
6427   const char *zSpace = "";
6428 
6429   while( i<nBlob ){
6430     u64 iVal;
6431     i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
6432     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
6433     zSpace = " ";
6434   }
6435 }
6436 #endif /* SQLITE_TEST */
6437 
6438 #ifdef SQLITE_TEST
6439 /*
6440 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
6441 ** each varint and append its string representation to buffer pBuf. Return
6442 ** after either the input buffer is exhausted or a 0 value is read.
6443 **
6444 ** The return value is the number of bytes read from the input buffer.
6445 */
fts5DecodePoslist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6446 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6447   int iOff = 0;
6448   while( iOff<n ){
6449     int iVal;
6450     iOff += fts5GetVarint32(&a[iOff], iVal);
6451     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
6452   }
6453   return iOff;
6454 }
6455 #endif /* SQLITE_TEST */
6456 
6457 #ifdef SQLITE_TEST
6458 /*
6459 ** The start of buffer (a/n) contains the start of a doclist. The doclist
6460 ** may or may not finish within the buffer. This function appends a text
6461 ** representation of the part of the doclist that is present to buffer
6462 ** pBuf.
6463 **
6464 ** The return value is the number of bytes read from the input buffer.
6465 */
fts5DecodeDoclist(int * pRc,Fts5Buffer * pBuf,const u8 * a,int n)6466 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
6467   i64 iDocid = 0;
6468   int iOff = 0;
6469 
6470   if( n>0 ){
6471     iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
6472     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6473   }
6474   while( iOff<n ){
6475     int nPos;
6476     int bDel;
6477     iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
6478     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
6479     iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
6480     if( iOff<n ){
6481       i64 iDelta;
6482       iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
6483       iDocid += iDelta;
6484       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
6485     }
6486   }
6487 
6488   return iOff;
6489 }
6490 #endif /* SQLITE_TEST */
6491 
6492 #ifdef SQLITE_TEST
6493 /*
6494 ** This function is part of the fts5_decode() debugging function. It is
6495 ** only ever used with detail=none tables.
6496 **
6497 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
6498 ** tables. This function appends a human-readable version of that list to
6499 ** buffer pBuf.
6500 **
6501 ** If *pRc is other than SQLITE_OK when this function is called, it is a
6502 ** no-op. If an OOM or other error occurs within this function, *pRc is
6503 ** set to an SQLite error code before returning. The final state of buffer
6504 ** pBuf is undefined in this case.
6505 */
fts5DecodeRowidList(int * pRc,Fts5Buffer * pBuf,const u8 * pData,int nData)6506 static void fts5DecodeRowidList(
6507   int *pRc,                       /* IN/OUT: Error code */
6508   Fts5Buffer *pBuf,               /* Buffer to append text to */
6509   const u8 *pData, int nData      /* Data to decode list-of-rowids from */
6510 ){
6511   int i = 0;
6512   i64 iRowid = 0;
6513 
6514   while( i<nData ){
6515     const char *zApp = "";
6516     u64 iVal;
6517     i += sqlite3Fts5GetVarint(&pData[i], &iVal);
6518     iRowid += iVal;
6519 
6520     if( i<nData && pData[i]==0x00 ){
6521       i++;
6522       if( i<nData && pData[i]==0x00 ){
6523         i++;
6524         zApp = "+";
6525       }else{
6526         zApp = "*";
6527       }
6528     }
6529 
6530     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
6531   }
6532 }
6533 #endif /* SQLITE_TEST */
6534 
6535 #ifdef SQLITE_TEST
6536 /*
6537 ** The implementation of user-defined scalar function fts5_decode().
6538 */
fts5DecodeFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6539 static void fts5DecodeFunction(
6540   sqlite3_context *pCtx,          /* Function call context */
6541   int nArg,                       /* Number of args (always 2) */
6542   sqlite3_value **apVal           /* Function arguments */
6543 ){
6544   i64 iRowid;                     /* Rowid for record being decoded */
6545   int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
6546   const u8 *aBlob; int n;         /* Record to decode */
6547   u8 *a = 0;
6548   Fts5Buffer s;                   /* Build up text to return here */
6549   int rc = SQLITE_OK;             /* Return code */
6550   sqlite3_int64 nSpace = 0;
6551   int eDetailNone = (sqlite3_user_data(pCtx)!=0);
6552 
6553   assert( nArg==2 );
6554   UNUSED_PARAM(nArg);
6555   memset(&s, 0, sizeof(Fts5Buffer));
6556   iRowid = sqlite3_value_int64(apVal[0]);
6557 
6558   /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
6559   ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
6560   ** buffer overreads even if the record is corrupt.  */
6561   n = sqlite3_value_bytes(apVal[1]);
6562   aBlob = sqlite3_value_blob(apVal[1]);
6563   nSpace = n + FTS5_DATA_ZERO_PADDING;
6564   a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
6565   if( a==0 ) goto decode_out;
6566   if( n>0 ) memcpy(a, aBlob, n);
6567 
6568   fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
6569 
6570   fts5DebugRowid(&rc, &s, iRowid);
6571   if( bDlidx ){
6572     Fts5Data dlidx;
6573     Fts5DlidxLvl lvl;
6574 
6575     dlidx.p = a;
6576     dlidx.nn = n;
6577 
6578     memset(&lvl, 0, sizeof(Fts5DlidxLvl));
6579     lvl.pData = &dlidx;
6580     lvl.iLeafPgno = iPgno;
6581 
6582     for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
6583       sqlite3Fts5BufferAppendPrintf(&rc, &s,
6584           " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
6585       );
6586     }
6587   }else if( iSegid==0 ){
6588     if( iRowid==FTS5_AVERAGES_ROWID ){
6589       fts5DecodeAverages(&rc, &s, a, n);
6590     }else{
6591       fts5DecodeStructure(&rc, &s, a, n);
6592     }
6593   }else if( eDetailNone ){
6594     Fts5Buffer term;              /* Current term read from page */
6595     int szLeaf;
6596     int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6597     int iTermOff;
6598     int nKeep = 0;
6599     int iOff;
6600 
6601     memset(&term, 0, sizeof(Fts5Buffer));
6602 
6603     /* Decode any entries that occur before the first term. */
6604     if( szLeaf<n ){
6605       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
6606     }else{
6607       iTermOff = szLeaf;
6608     }
6609     fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
6610 
6611     iOff = iTermOff;
6612     while( iOff<szLeaf ){
6613       int nAppend;
6614 
6615       /* Read the term data for the next term*/
6616       iOff += fts5GetVarint32(&a[iOff], nAppend);
6617       term.n = nKeep;
6618       fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
6619       sqlite3Fts5BufferAppendPrintf(
6620           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6621       );
6622       iOff += nAppend;
6623 
6624       /* Figure out where the doclist for this term ends */
6625       if( iPgidxOff<n ){
6626         int nIncr;
6627         iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
6628         iTermOff += nIncr;
6629       }else{
6630         iTermOff = szLeaf;
6631       }
6632 
6633       fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
6634       iOff = iTermOff;
6635       if( iOff<szLeaf ){
6636         iOff += fts5GetVarint32(&a[iOff], nKeep);
6637       }
6638     }
6639 
6640     fts5BufferFree(&term);
6641   }else{
6642     Fts5Buffer term;              /* Current term read from page */
6643     int szLeaf;                   /* Offset of pgidx in a[] */
6644     int iPgidxOff;
6645     int iPgidxPrev = 0;           /* Previous value read from pgidx */
6646     int iTermOff = 0;
6647     int iRowidOff = 0;
6648     int iOff;
6649     int nDoclist;
6650 
6651     memset(&term, 0, sizeof(Fts5Buffer));
6652 
6653     if( n<4 ){
6654       sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
6655       goto decode_out;
6656     }else{
6657       iRowidOff = fts5GetU16(&a[0]);
6658       iPgidxOff = szLeaf = fts5GetU16(&a[2]);
6659       if( iPgidxOff<n ){
6660         fts5GetVarint32(&a[iPgidxOff], iTermOff);
6661       }else if( iPgidxOff>n ){
6662         rc = FTS5_CORRUPT;
6663         goto decode_out;
6664       }
6665     }
6666 
6667     /* Decode the position list tail at the start of the page */
6668     if( iRowidOff!=0 ){
6669       iOff = iRowidOff;
6670     }else if( iTermOff!=0 ){
6671       iOff = iTermOff;
6672     }else{
6673       iOff = szLeaf;
6674     }
6675     if( iOff>n ){
6676       rc = FTS5_CORRUPT;
6677       goto decode_out;
6678     }
6679     fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
6680 
6681     /* Decode any more doclist data that appears on the page before the
6682     ** first term. */
6683     nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
6684     if( nDoclist+iOff>n ){
6685       rc = FTS5_CORRUPT;
6686       goto decode_out;
6687     }
6688     fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
6689 
6690     while( iPgidxOff<n && rc==SQLITE_OK ){
6691       int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
6692       int nByte;                            /* Bytes of data */
6693       int iEnd;
6694 
6695       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
6696       iPgidxPrev += nByte;
6697       iOff = iPgidxPrev;
6698 
6699       if( iPgidxOff<n ){
6700         fts5GetVarint32(&a[iPgidxOff], nByte);
6701         iEnd = iPgidxPrev + nByte;
6702       }else{
6703         iEnd = szLeaf;
6704       }
6705       if( iEnd>szLeaf ){
6706         rc = FTS5_CORRUPT;
6707         break;
6708       }
6709 
6710       if( bFirst==0 ){
6711         iOff += fts5GetVarint32(&a[iOff], nByte);
6712         if( nByte>term.n ){
6713           rc = FTS5_CORRUPT;
6714           break;
6715         }
6716         term.n = nByte;
6717       }
6718       iOff += fts5GetVarint32(&a[iOff], nByte);
6719       if( iOff+nByte>n ){
6720         rc = FTS5_CORRUPT;
6721         break;
6722       }
6723       fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
6724       iOff += nByte;
6725 
6726       sqlite3Fts5BufferAppendPrintf(
6727           &rc, &s, " term=%.*s", term.n, (const char*)term.p
6728       );
6729       iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
6730     }
6731 
6732     fts5BufferFree(&term);
6733   }
6734 
6735  decode_out:
6736   sqlite3_free(a);
6737   if( rc==SQLITE_OK ){
6738     sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
6739   }else{
6740     sqlite3_result_error_code(pCtx, rc);
6741   }
6742   fts5BufferFree(&s);
6743 }
6744 #endif /* SQLITE_TEST */
6745 
6746 #ifdef SQLITE_TEST
6747 /*
6748 ** The implementation of user-defined scalar function fts5_rowid().
6749 */
fts5RowidFunction(sqlite3_context * pCtx,int nArg,sqlite3_value ** apVal)6750 static void fts5RowidFunction(
6751   sqlite3_context *pCtx,          /* Function call context */
6752   int nArg,                       /* Number of args (always 2) */
6753   sqlite3_value **apVal           /* Function arguments */
6754 ){
6755   const char *zArg;
6756   if( nArg==0 ){
6757     sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
6758   }else{
6759     zArg = (const char*)sqlite3_value_text(apVal[0]);
6760     if( 0==sqlite3_stricmp(zArg, "segment") ){
6761       i64 iRowid;
6762       int segid, pgno;
6763       if( nArg!=3 ){
6764         sqlite3_result_error(pCtx,
6765             "should be: fts5_rowid('segment', segid, pgno))", -1
6766         );
6767       }else{
6768         segid = sqlite3_value_int(apVal[1]);
6769         pgno = sqlite3_value_int(apVal[2]);
6770         iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
6771         sqlite3_result_int64(pCtx, iRowid);
6772       }
6773     }else{
6774       sqlite3_result_error(pCtx,
6775         "first arg to fts5_rowid() must be 'segment'" , -1
6776       );
6777     }
6778   }
6779 }
6780 #endif /* SQLITE_TEST */
6781 
6782 /*
6783 ** This is called as part of registering the FTS5 module with database
6784 ** connection db. It registers several user-defined scalar functions useful
6785 ** with FTS5.
6786 **
6787 ** If successful, SQLITE_OK is returned. If an error occurs, some other
6788 ** SQLite error code is returned instead.
6789 */
sqlite3Fts5IndexInit(sqlite3 * db)6790 int sqlite3Fts5IndexInit(sqlite3 *db){
6791 #ifdef SQLITE_TEST
6792   int rc = sqlite3_create_function(
6793       db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
6794   );
6795 
6796   if( rc==SQLITE_OK ){
6797     rc = sqlite3_create_function(
6798         db, "fts5_decode_none", 2,
6799         SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
6800     );
6801   }
6802 
6803   if( rc==SQLITE_OK ){
6804     rc = sqlite3_create_function(
6805         db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
6806     );
6807   }
6808   return rc;
6809 #else
6810   return SQLITE_OK;
6811   UNUSED_PARAM(db);
6812 #endif
6813 }
6814 
6815 
sqlite3Fts5IndexReset(Fts5Index * p)6816 int sqlite3Fts5IndexReset(Fts5Index *p){
6817   assert( p->pStruct==0 || p->iStructVersion!=0 );
6818   if( fts5IndexDataVersion(p)!=p->iStructVersion ){
6819     fts5StructureInvalidate(p);
6820   }
6821   return fts5IndexReturn(p);
6822 }
6823