1 
2 
3 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5)
4 
5 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
6 # define NDEBUG 1
7 #endif
8 #if defined(NDEBUG) && defined(SQLITE_DEBUG)
9 # undef NDEBUG
10 #endif
11 
12 #line 1 "fts5.h"
13 /*
14 ** 2014 May 31
15 **
16 ** The author disclaims copyright to this source code.  In place of
17 ** a legal notice, here is a blessing:
18 **
19 **    May you do good and not evil.
20 **    May you find forgiveness for yourself and forgive others.
21 **    May you share freely, never taking more than you give.
22 **
23 ******************************************************************************
24 **
25 ** Interfaces to extend FTS5. Using the interfaces defined in this file,
26 ** FTS5 may be extended with:
27 **
28 **     * custom tokenizers, and
29 **     * custom auxiliary functions.
30 */
31 
32 
33 #ifndef _FTS5_H
34 #define _FTS5_H
35 
36 #include "sqlite3.h"
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 /*************************************************************************
43 ** CUSTOM AUXILIARY FUNCTIONS
44 **
45 ** Virtual table implementations may overload SQL functions by implementing
46 ** the sqlite3_module.xFindFunction() method.
47 */
48 
49 typedef struct Fts5ExtensionApi Fts5ExtensionApi;
50 typedef struct Fts5Context Fts5Context;
51 typedef struct Fts5PhraseIter Fts5PhraseIter;
52 
53 typedef void (*fts5_extension_function)(
54   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
55   Fts5Context *pFts,              /* First arg to pass to pApi functions */
56   sqlite3_context *pCtx,          /* Context for returning result/error */
57   int nVal,                       /* Number of values in apVal[] array */
58   sqlite3_value **apVal           /* Array of trailing arguments */
59 );
60 
61 struct Fts5PhraseIter {
62   const unsigned char *a;
63   const unsigned char *b;
64 };
65 
66 /*
67 ** EXTENSION API FUNCTIONS
68 **
69 ** xUserData(pFts):
70 **   Return a copy of the context pointer the extension function was
71 **   registered with.
72 **
73 ** xColumnTotalSize(pFts, iCol, pnToken):
74 **   If parameter iCol is less than zero, set output variable *pnToken
75 **   to the total number of tokens in the FTS5 table. Or, if iCol is
76 **   non-negative but less than the number of columns in the table, return
77 **   the total number of tokens in column iCol, considering all rows in
78 **   the FTS5 table.
79 **
80 **   If parameter iCol is greater than or equal to the number of columns
81 **   in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
82 **   an OOM condition or IO error), an appropriate SQLite error code is
83 **   returned.
84 **
85 ** xColumnCount(pFts):
86 **   Return the number of columns in the table.
87 **
88 ** xColumnSize(pFts, iCol, pnToken):
89 **   If parameter iCol is less than zero, set output variable *pnToken
90 **   to the total number of tokens in the current row. Or, if iCol is
91 **   non-negative but less than the number of columns in the table, set
92 **   *pnToken to the number of tokens in column iCol of the current row.
93 **
94 **   If parameter iCol is greater than or equal to the number of columns
95 **   in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
96 **   an OOM condition or IO error), an appropriate SQLite error code is
97 **   returned.
98 **
99 **   This function may be quite inefficient if used with an FTS5 table
100 **   created with the "columnsize=0" option.
101 **
102 ** xColumnText:
103 **   This function attempts to retrieve the text of column iCol of the
104 **   current document. If successful, (*pz) is set to point to a buffer
105 **   containing the text in utf-8 encoding, (*pn) is set to the size in bytes
106 **   (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
107 **   if an error occurs, an SQLite error code is returned and the final values
108 **   of (*pz) and (*pn) are undefined.
109 **
110 ** xPhraseCount:
111 **   Returns the number of phrases in the current query expression.
112 **
113 ** xPhraseSize:
114 **   Returns the number of tokens in phrase iPhrase of the query. Phrases
115 **   are numbered starting from zero.
116 **
117 ** xInstCount:
118 **   Set *pnInst to the total number of occurrences of all phrases within
119 **   the query within the current row. Return SQLITE_OK if successful, or
120 **   an error code (i.e. SQLITE_NOMEM) if an error occurs.
121 **
122 **   This API can be quite slow if used with an FTS5 table created with the
123 **   "detail=none" or "detail=column" option. If the FTS5 table is created
124 **   with either "detail=none" or "detail=column" and "content=" option
125 **   (i.e. if it is a contentless table), then this API always returns 0.
126 **
127 ** xInst:
128 **   Query for the details of phrase match iIdx within the current row.
129 **   Phrase matches are numbered starting from zero, so the iIdx argument
130 **   should be greater than or equal to zero and smaller than the value
131 **   output by xInstCount().
132 **
133 **   Usually, output parameter *piPhrase is set to the phrase number, *piCol
134 **   to the column in which it occurs and *piOff the token offset of the
135 **   first token of the phrase. Returns SQLITE_OK if successful, or an error
136 **   code (i.e. SQLITE_NOMEM) if an error occurs.
137 **
138 **   This API can be quite slow if used with an FTS5 table created with the
139 **   "detail=none" or "detail=column" option.
140 **
141 ** xRowid:
142 **   Returns the rowid of the current row.
143 **
144 ** xTokenize:
145 **   Tokenize text using the tokenizer belonging to the FTS5 table.
146 **
147 ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
148 **   This API function is used to query the FTS table for phrase iPhrase
149 **   of the current query. Specifically, a query equivalent to:
150 **
151 **       ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
152 **
153 **   with $p set to a phrase equivalent to the phrase iPhrase of the
154 **   current query is executed. Any column filter that applies to
155 **   phrase iPhrase of the current query is included in $p. For each
156 **   row visited, the callback function passed as the fourth argument
157 **   is invoked. The context and API objects passed to the callback
158 **   function may be used to access the properties of each matched row.
159 **   Invoking Api.xUserData() returns a copy of the pointer passed as
160 **   the third argument to pUserData.
161 **
162 **   If the callback function returns any value other than SQLITE_OK, the
163 **   query is abandoned and the xQueryPhrase function returns immediately.
164 **   If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
165 **   Otherwise, the error code is propagated upwards.
166 **
167 **   If the query runs to completion without incident, SQLITE_OK is returned.
168 **   Or, if some error occurs before the query completes or is aborted by
169 **   the callback, an SQLite error code is returned.
170 **
171 **
172 ** xSetAuxdata(pFts5, pAux, xDelete)
173 **
174 **   Save the pointer passed as the second argument as the extension function's
175 **   "auxiliary data". The pointer may then be retrieved by the current or any
176 **   future invocation of the same fts5 extension function made as part of
177 **   the same MATCH query using the xGetAuxdata() API.
178 **
179 **   Each extension function is allocated a single auxiliary data slot for
180 **   each FTS query (MATCH expression). If the extension function is invoked
181 **   more than once for a single FTS query, then all invocations share a
182 **   single auxiliary data context.
183 **
184 **   If there is already an auxiliary data pointer when this function is
185 **   invoked, then it is replaced by the new pointer. If an xDelete callback
186 **   was specified along with the original pointer, it is invoked at this
187 **   point.
188 **
189 **   The xDelete callback, if one is specified, is also invoked on the
190 **   auxiliary data pointer after the FTS5 query has finished.
191 **
192 **   If an error (e.g. an OOM condition) occurs within this function,
193 **   the auxiliary data is set to NULL and an error code returned. If the
194 **   xDelete parameter was not NULL, it is invoked on the auxiliary data
195 **   pointer before returning.
196 **
197 **
198 ** xGetAuxdata(pFts5, bClear)
199 **
200 **   Returns the current auxiliary data pointer for the fts5 extension
201 **   function. See the xSetAuxdata() method for details.
202 **
203 **   If the bClear argument is non-zero, then the auxiliary data is cleared
204 **   (set to NULL) before this function returns. In this case the xDelete,
205 **   if any, is not invoked.
206 **
207 **
208 ** xRowCount(pFts5, pnRow)
209 **
210 **   This function is used to retrieve the total number of rows in the table.
211 **   In other words, the same value that would be returned by:
212 **
213 **        SELECT count(*) FROM ftstable;
214 **
215 ** xPhraseFirst()
216 **   This function is used, along with type Fts5PhraseIter and the xPhraseNext
217 **   method, to iterate through all instances of a single query phrase within
218 **   the current row. This is the same information as is accessible via the
219 **   xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
220 **   to use, this API may be faster under some circumstances. To iterate
221 **   through instances of phrase iPhrase, use the following code:
222 **
223 **       Fts5PhraseIter iter;
224 **       int iCol, iOff;
225 **       for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
226 **           iCol>=0;
227 **           pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
228 **       ){
229 **         // An instance of phrase iPhrase at offset iOff of column iCol
230 **       }
231 **
232 **   The Fts5PhraseIter structure is defined above. Applications should not
233 **   modify this structure directly - it should only be used as shown above
234 **   with the xPhraseFirst() and xPhraseNext() API methods (and by
235 **   xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below).
236 **
237 **   This API can be quite slow if used with an FTS5 table created with the
238 **   "detail=none" or "detail=column" option. If the FTS5 table is created
239 **   with either "detail=none" or "detail=column" and "content=" option
240 **   (i.e. if it is a contentless table), then this API always iterates
241 **   through an empty set (all calls to xPhraseFirst() set iCol to -1).
242 **
243 ** xPhraseNext()
244 **   See xPhraseFirst above.
245 **
246 ** xPhraseFirstColumn()
247 **   This function and xPhraseNextColumn() are similar to the xPhraseFirst()
248 **   and xPhraseNext() APIs described above. The difference is that instead
249 **   of iterating through all instances of a phrase in the current row, these
250 **   APIs are used to iterate through the set of columns in the current row
251 **   that contain one or more instances of a specified phrase. For example:
252 **
253 **       Fts5PhraseIter iter;
254 **       int iCol;
255 **       for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
256 **           iCol>=0;
257 **           pApi->xPhraseNextColumn(pFts, &iter, &iCol)
258 **       ){
259 **         // Column iCol contains at least one instance of phrase iPhrase
260 **       }
261 **
262 **   This API can be quite slow if used with an FTS5 table created with the
263 **   "detail=none" option. If the FTS5 table is created with either
264 **   "detail=none" "content=" option (i.e. if it is a contentless table),
265 **   then this API always iterates through an empty set (all calls to
266 **   xPhraseFirstColumn() set iCol to -1).
267 **
268 **   The information accessed using this API and its companion
269 **   xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext
270 **   (or xInst/xInstCount). The chief advantage of this API is that it is
271 **   significantly more efficient than those alternatives when used with
272 **   "detail=column" tables.
273 **
274 ** xPhraseNextColumn()
275 **   See xPhraseFirstColumn above.
276 */
277 struct Fts5ExtensionApi {
278   int iVersion;                   /* Currently always set to 3 */
279 
280   void *(*xUserData)(Fts5Context*);
281 
282   int (*xColumnCount)(Fts5Context*);
283   int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
284   int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
285 
286   int (*xTokenize)(Fts5Context*,
287     const char *pText, int nText, /* Text to tokenize */
288     void *pCtx,                   /* Context passed to xToken() */
289     int (*xToken)(void*, int, const char*, int, int, int)       /* Callback */
290   );
291 
292   int (*xPhraseCount)(Fts5Context*);
293   int (*xPhraseSize)(Fts5Context*, int iPhrase);
294 
295   int (*xInstCount)(Fts5Context*, int *pnInst);
296   int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
297 
298   sqlite3_int64 (*xRowid)(Fts5Context*);
299   int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
300   int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
301 
302   int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
303     int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
304   );
305   int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
306   void *(*xGetAuxdata)(Fts5Context*, int bClear);
307 
308   int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
309   void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
310 
311   int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*);
312   void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol);
313 };
314 
315 /*
316 ** CUSTOM AUXILIARY FUNCTIONS
317 *************************************************************************/
318 
319 /*************************************************************************
320 ** CUSTOM TOKENIZERS
321 **
322 ** Applications may also register custom tokenizer types. A tokenizer
323 ** is registered by providing fts5 with a populated instance of the
324 ** following structure. All structure methods must be defined, setting
325 ** any member of the fts5_tokenizer struct to NULL leads to undefined
326 ** behaviour. The structure methods are expected to function as follows:
327 **
328 ** xCreate:
329 **   This function is used to allocate and initialize a tokenizer instance.
330 **   A tokenizer instance is required to actually tokenize text.
331 **
332 **   The first argument passed to this function is a copy of the (void*)
333 **   pointer provided by the application when the fts5_tokenizer object
334 **   was registered with FTS5 (the third argument to xCreateTokenizer()).
335 **   The second and third arguments are an array of nul-terminated strings
336 **   containing the tokenizer arguments, if any, specified following the
337 **   tokenizer name as part of the CREATE VIRTUAL TABLE statement used
338 **   to create the FTS5 table.
339 **
340 **   The final argument is an output variable. If successful, (*ppOut)
341 **   should be set to point to the new tokenizer handle and SQLITE_OK
342 **   returned. If an error occurs, some value other than SQLITE_OK should
343 **   be returned. In this case, fts5 assumes that the final value of *ppOut
344 **   is undefined.
345 **
346 ** xDelete:
347 **   This function is invoked to delete a tokenizer handle previously
348 **   allocated using xCreate(). Fts5 guarantees that this function will
349 **   be invoked exactly once for each successful call to xCreate().
350 **
351 ** xTokenize:
352 **   This function is expected to tokenize the nText byte string indicated
353 **   by argument pText. pText may or may not be nul-terminated. The first
354 **   argument passed to this function is a pointer to an Fts5Tokenizer object
355 **   returned by an earlier call to xCreate().
356 **
357 **   The second argument indicates the reason that FTS5 is requesting
358 **   tokenization of the supplied text. This is always one of the following
359 **   four values:
360 **
361 **   <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
362 **            or removed from the FTS table. The tokenizer is being invoked to
363 **            determine the set of tokens to add to (or delete from) the
364 **            FTS index.
365 **
366 **       <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
367 **            against the FTS index. The tokenizer is being called to tokenize
368 **            a bareword or quoted string specified as part of the query.
369 **
370 **       <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
371 **            FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
372 **            followed by a "*" character, indicating that the last token
373 **            returned by the tokenizer will be treated as a token prefix.
374 **
375 **       <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
376 **            satisfy an fts5_api.xTokenize() request made by an auxiliary
377 **            function. Or an fts5_api.xColumnSize() request made by the same
378 **            on a columnsize=0 database.
379 **   </ul>
380 **
381 **   For each token in the input string, the supplied callback xToken() must
382 **   be invoked. The first argument to it should be a copy of the pointer
383 **   passed as the second argument to xTokenize(). The third and fourth
384 **   arguments are a pointer to a buffer containing the token text, and the
385 **   size of the token in bytes. The 4th and 5th arguments are the byte offsets
386 **   of the first byte of and first byte immediately following the text from
387 **   which the token is derived within the input.
388 **
389 **   The second argument passed to the xToken() callback ("tflags") should
390 **   normally be set to 0. The exception is if the tokenizer supports
391 **   synonyms. In this case see the discussion below for details.
392 **
393 **   FTS5 assumes the xToken() callback is invoked for each token in the
394 **   order that they occur within the input text.
395 **
396 **   If an xToken() callback returns any value other than SQLITE_OK, then
397 **   the tokenization should be abandoned and the xTokenize() method should
398 **   immediately return a copy of the xToken() return value. Or, if the
399 **   input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
400 **   if an error occurs with the xTokenize() implementation itself, it
401 **   may abandon the tokenization and return any error code other than
402 **   SQLITE_OK or SQLITE_DONE.
403 **
404 ** SYNONYM SUPPORT
405 **
406 **   Custom tokenizers may also support synonyms. Consider a case in which a
407 **   user wishes to query for a phrase such as "first place". Using the
408 **   built-in tokenizers, the FTS5 query 'first + place' will match instances
409 **   of "first place" within the document set, but not alternative forms
410 **   such as "1st place". In some applications, it would be better to match
411 **   all instances of "first place" or "1st place" regardless of which form
412 **   the user specified in the MATCH query text.
413 **
414 **   There are several ways to approach this in FTS5:
415 **
416 **   <ol><li> By mapping all synonyms to a single token. In this case, using
417 **            the above example, this means that the tokenizer returns the
418 **            same token for inputs "first" and "1st". Say that token is in
419 **            fact "first", so that when the user inserts the document "I won
420 **            1st place" entries are added to the index for tokens "i", "won",
421 **            "first" and "place". If the user then queries for '1st + place',
422 **            the tokenizer substitutes "first" for "1st" and the query works
423 **            as expected.
424 **
425 **       <li> By querying the index for all synonyms of each query term
426 **            separately. In this case, when tokenizing query text, the
427 **            tokenizer may provide multiple synonyms for a single term
428 **            within the document. FTS5 then queries the index for each
429 **            synonym individually. For example, faced with the query:
430 **
431 **   <codeblock>
432 **     ... MATCH 'first place'</codeblock>
433 **
434 **            the tokenizer offers both "1st" and "first" as synonyms for the
435 **            first token in the MATCH query and FTS5 effectively runs a query
436 **            similar to:
437 **
438 **   <codeblock>
439 **     ... MATCH '(first OR 1st) place'</codeblock>
440 **
441 **            except that, for the purposes of auxiliary functions, the query
442 **            still appears to contain just two phrases - "(first OR 1st)"
443 **            being treated as a single phrase.
444 **
445 **       <li> By adding multiple synonyms for a single term to the FTS index.
446 **            Using this method, when tokenizing document text, the tokenizer
447 **            provides multiple synonyms for each token. So that when a
448 **            document such as "I won first place" is tokenized, entries are
449 **            added to the FTS index for "i", "won", "first", "1st" and
450 **            "place".
451 **
452 **            This way, even if the tokenizer does not provide synonyms
453 **            when tokenizing query text (it should not - to do so would be
454 **            inefficient), it doesn't matter if the user queries for
455 **            'first + place' or '1st + place', as there are entries in the
456 **            FTS index corresponding to both forms of the first token.
457 **   </ol>
458 **
459 **   Whether it is parsing document or query text, any call to xToken that
460 **   specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
461 **   is considered to supply a synonym for the previous token. For example,
462 **   when parsing the document "I won first place", a tokenizer that supports
463 **   synonyms would call xToken() 5 times, as follows:
464 **
465 **   <codeblock>
466 **       xToken(pCtx, 0, "i",                      1,  0,  1);
467 **       xToken(pCtx, 0, "won",                    3,  2,  5);
468 **       xToken(pCtx, 0, "first",                  5,  6, 11);
469 **       xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3,  6, 11);
470 **       xToken(pCtx, 0, "place",                  5, 12, 17);
471 **</codeblock>
472 **
473 **   It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
474 **   xToken() is called. Multiple synonyms may be specified for a single token
475 **   by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
476 **   There is no limit to the number of synonyms that may be provided for a
477 **   single token.
478 **
479 **   In many cases, method (1) above is the best approach. It does not add
480 **   extra data to the FTS index or require FTS5 to query for multiple terms,
481 **   so it is efficient in terms of disk space and query speed. However, it
482 **   does not support prefix queries very well. If, as suggested above, the
483 **   token "first" is substituted for "1st" by the tokenizer, then the query:
484 **
485 **   <codeblock>
486 **     ... MATCH '1s*'</codeblock>
487 **
488 **   will not match documents that contain the token "1st" (as the tokenizer
489 **   will probably not map "1s" to any prefix of "first").
490 **
491 **   For full prefix support, method (3) may be preferred. In this case,
492 **   because the index contains entries for both "first" and "1st", prefix
493 **   queries such as 'fi*' or '1s*' will match correctly. However, because
494 **   extra entries are added to the FTS index, this method uses more space
495 **   within the database.
496 **
497 **   Method (2) offers a midpoint between (1) and (3). Using this method,
498 **   a query such as '1s*' will match documents that contain the literal
499 **   token "1st", but not "first" (assuming the tokenizer is not able to
500 **   provide synonyms for prefixes). However, a non-prefix query like '1st'
501 **   will match against "1st" and "first". This method does not require
502 **   extra disk space, as no extra entries are added to the FTS index.
503 **   On the other hand, it may require more CPU cycles to run MATCH queries,
504 **   as separate queries of the FTS index are required for each synonym.
505 **
506 **   When using methods (2) or (3), it is important that the tokenizer only
507 **   provide synonyms when tokenizing document text (method (2)) or query
508 **   text (method (3)), not both. Doing so will not cause any errors, but is
509 **   inefficient.
510 */
511 typedef struct Fts5Tokenizer Fts5Tokenizer;
512 typedef struct fts5_tokenizer fts5_tokenizer;
513 struct fts5_tokenizer {
514   int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
515   void (*xDelete)(Fts5Tokenizer*);
516   int (*xTokenize)(Fts5Tokenizer*,
517       void *pCtx,
518       int flags,            /* Mask of FTS5_TOKENIZE_* flags */
519       const char *pText, int nText,
520       int (*xToken)(
521         void *pCtx,         /* Copy of 2nd argument to xTokenize() */
522         int tflags,         /* Mask of FTS5_TOKEN_* flags */
523         const char *pToken, /* Pointer to buffer containing token */
524         int nToken,         /* Size of token in bytes */
525         int iStart,         /* Byte offset of token within input text */
526         int iEnd            /* Byte offset of end of token within input text */
527       )
528   );
529 };
530 
531 /* Flags that may be passed as the third argument to xTokenize() */
532 #define FTS5_TOKENIZE_QUERY     0x0001
533 #define FTS5_TOKENIZE_PREFIX    0x0002
534 #define FTS5_TOKENIZE_DOCUMENT  0x0004
535 #define FTS5_TOKENIZE_AUX       0x0008
536 
537 /* Flags that may be passed by the tokenizer implementation back to FTS5
538 ** as the third argument to the supplied xToken callback. */
539 #define FTS5_TOKEN_COLOCATED    0x0001      /* Same position as prev. token */
540 
541 /*
542 ** END OF CUSTOM TOKENIZERS
543 *************************************************************************/
544 
545 /*************************************************************************
546 ** FTS5 EXTENSION REGISTRATION API
547 */
548 typedef struct fts5_api fts5_api;
549 struct fts5_api {
550   int iVersion;                   /* Currently always set to 2 */
551 
552   /* Create a new tokenizer */
553   int (*xCreateTokenizer)(
554     fts5_api *pApi,
555     const char *zName,
556     void *pContext,
557     fts5_tokenizer *pTokenizer,
558     void (*xDestroy)(void*)
559   );
560 
561   /* Find an existing tokenizer */
562   int (*xFindTokenizer)(
563     fts5_api *pApi,
564     const char *zName,
565     void **ppContext,
566     fts5_tokenizer *pTokenizer
567   );
568 
569   /* Create a new auxiliary function */
570   int (*xCreateFunction)(
571     fts5_api *pApi,
572     const char *zName,
573     void *pContext,
574     fts5_extension_function xFunction,
575     void (*xDestroy)(void*)
576   );
577 };
578 
579 /*
580 ** END OF REGISTRATION API
581 *************************************************************************/
582 
583 #ifdef __cplusplus
584 }  /* end of the 'extern "C"' block */
585 #endif
586 
587 #endif /* _FTS5_H */
588 
589 #line 1 "fts5Int.h"
590 /*
591 ** 2014 May 31
592 **
593 ** The author disclaims copyright to this source code.  In place of
594 ** a legal notice, here is a blessing:
595 **
596 **    May you do good and not evil.
597 **    May you find forgiveness for yourself and forgive others.
598 **    May you share freely, never taking more than you give.
599 **
600 ******************************************************************************
601 **
602 */
603 #ifndef _FTS5INT_H
604 #define _FTS5INT_H
605 
606 /* #include "fts5.h" */
607 #include "sqlite3ext.h"
608 SQLITE_EXTENSION_INIT1
609 
610 #include <string.h>
611 #include <assert.h>
612 
613 #ifndef SQLITE_AMALGAMATION
614 
615 typedef unsigned char  u8;
616 typedef unsigned int   u32;
617 typedef unsigned short u16;
618 typedef short i16;
619 typedef sqlite3_int64 i64;
620 typedef sqlite3_uint64 u64;
621 
622 #ifndef ArraySize
623 # define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0])))
624 #endif
625 
626 #define testcase(x)
627 
628 #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST)
629 # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1
630 #endif
631 #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS)
632 # define ALWAYS(X)      (1)
633 # define NEVER(X)       (0)
634 #elif !defined(NDEBUG)
635 # define ALWAYS(X)      ((X)?1:(assert(0),0))
636 # define NEVER(X)       ((X)?(assert(0),1):0)
637 #else
638 # define ALWAYS(X)      (X)
639 # define NEVER(X)       (X)
640 #endif
641 
642 #define MIN(x,y) (((x) < (y)) ? (x) : (y))
643 #define MAX(x,y) (((x) > (y)) ? (x) : (y))
644 
645 /*
646 ** Constants for the largest and smallest possible 64-bit signed integers.
647 */
648 # define LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
649 # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64)
650 
651 #endif
652 
653 /* Truncate very long tokens to this many bytes. Hard limit is
654 ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset
655 ** field that occurs at the start of each leaf page (see fts5_index.c). */
656 #define FTS5_MAX_TOKEN_SIZE 32768
657 
658 /*
659 ** Maximum number of prefix indexes on single FTS5 table. This must be
660 ** less than 32. If it is set to anything large than that, an #error
661 ** directive in fts5_index.c will cause the build to fail.
662 */
663 #define FTS5_MAX_PREFIX_INDEXES 31
664 
665 /*
666 ** Maximum segments permitted in a single index
667 */
668 #define FTS5_MAX_SEGMENT 2000
669 
670 #define FTS5_DEFAULT_NEARDIST 10
671 #define FTS5_DEFAULT_RANK     "bm25"
672 
673 /* Name of rank and rowid columns */
674 #define FTS5_RANK_NAME "rank"
675 #define FTS5_ROWID_NAME "rowid"
676 
677 #ifdef SQLITE_DEBUG
678 # define FTS5_CORRUPT sqlite3Fts5Corrupt()
679 static int sqlite3Fts5Corrupt(void);
680 #else
681 # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
682 #endif
683 
684 /*
685 ** The assert_nc() macro is similar to the assert() macro, except that it
686 ** is used for assert() conditions that are true only if it can be
687 ** guranteed that the database is not corrupt.
688 */
689 #ifdef SQLITE_DEBUG
690 extern int sqlite3_fts5_may_be_corrupt;
691 # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x))
692 #else
693 # define assert_nc(x) assert(x)
694 #endif
695 
696 /*
697 ** A version of memcmp() that does not cause asan errors if one of the pointer
698 ** parameters is NULL and the number of bytes to compare is zero.
699 */
700 #define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n)))
701 
702 /* Mark a function parameter as unused, to suppress nuisance compiler
703 ** warnings. */
704 #ifndef UNUSED_PARAM
705 # define UNUSED_PARAM(X)  (void)(X)
706 #endif
707 
708 #ifndef UNUSED_PARAM2
709 # define UNUSED_PARAM2(X, Y)  (void)(X), (void)(Y)
710 #endif
711 
712 typedef struct Fts5Global Fts5Global;
713 typedef struct Fts5Colset Fts5Colset;
714 
715 /* If a NEAR() clump or phrase may only match a specific set of columns,
716 ** then an object of the following type is used to record the set of columns.
717 ** Each entry in the aiCol[] array is a column that may be matched.
718 **
719 ** This object is used by fts5_expr.c and fts5_index.c.
720 */
721 struct Fts5Colset {
722   int nCol;
723   int aiCol[1];
724 };
725 
726 
727 
728 /**************************************************************************
729 ** Interface to code in fts5_config.c. fts5_config.c contains contains code
730 ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement.
731 */
732 
733 typedef struct Fts5Config Fts5Config;
734 
735 /*
736 ** An instance of the following structure encodes all information that can
737 ** be gleaned from the CREATE VIRTUAL TABLE statement.
738 **
739 ** And all information loaded from the %_config table.
740 **
741 ** nAutomerge:
742 **   The minimum number of segments that an auto-merge operation should
743 **   attempt to merge together. A value of 1 sets the object to use the
744 **   compile time default. Zero disables auto-merge altogether.
745 **
746 ** zContent:
747 **
748 ** zContentRowid:
749 **   The value of the content_rowid= option, if one was specified. Or
750 **   the string "rowid" otherwise. This text is not quoted - if it is
751 **   used as part of an SQL statement it needs to be quoted appropriately.
752 **
753 ** zContentExprlist:
754 **
755 ** pzErrmsg:
756 **   This exists in order to allow the fts5_index.c module to return a
757 **   decent error message if it encounters a file-format version it does
758 **   not understand.
759 **
760 ** bColumnsize:
761 **   True if the %_docsize table is created.
762 **
763 ** bPrefixIndex:
764 **   This is only used for debugging. If set to false, any prefix indexes
765 **   are ignored. This value is configured using:
766 **
767 **       INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex);
768 **
769 */
770 struct Fts5Config {
771   sqlite3 *db;                    /* Database handle */
772   char *zDb;                      /* Database holding FTS index (e.g. "main") */
773   char *zName;                    /* Name of FTS index */
774   int nCol;                       /* Number of columns */
775   char **azCol;                   /* Column names */
776   u8 *abUnindexed;                /* True for unindexed columns */
777   int nPrefix;                    /* Number of prefix indexes */
778   int *aPrefix;                   /* Sizes in bytes of nPrefix prefix indexes */
779   int eContent;                   /* An FTS5_CONTENT value */
780   char *zContent;                 /* content table */
781   char *zContentRowid;            /* "content_rowid=" option value */
782   int bColumnsize;                /* "columnsize=" option value (dflt==1) */
783   int eDetail;                    /* FTS5_DETAIL_XXX value */
784   char *zContentExprlist;
785   Fts5Tokenizer *pTok;
786   fts5_tokenizer *pTokApi;
787   int bLock;                      /* True when table is preparing statement */
788   int ePattern;                   /* FTS_PATTERN_XXX constant */
789 
790   /* Values loaded from the %_config table */
791   int iCookie;                    /* Incremented when %_config is modified */
792   int pgsz;                       /* Approximate page size used in %_data */
793   int nAutomerge;                 /* 'automerge' setting */
794   int nCrisisMerge;               /* Maximum allowed segments per level */
795   int nUsermerge;                 /* 'usermerge' setting */
796   int nHashSize;                  /* Bytes of memory for in-memory hash */
797   char *zRank;                    /* Name of rank function */
798   char *zRankArgs;                /* Arguments to rank function */
799 
800   /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */
801   char **pzErrmsg;
802 
803 #ifdef SQLITE_DEBUG
804   int bPrefixIndex;               /* True to use prefix-indexes */
805 #endif
806 };
807 
808 /* Current expected value of %_config table 'version' field */
809 #define FTS5_CURRENT_VERSION  4
810 
811 #define FTS5_CONTENT_NORMAL   0
812 #define FTS5_CONTENT_NONE     1
813 #define FTS5_CONTENT_EXTERNAL 2
814 
815 #define FTS5_DETAIL_FULL      0
816 #define FTS5_DETAIL_NONE      1
817 #define FTS5_DETAIL_COLUMNS   2
818 
819 #define FTS5_PATTERN_NONE     0
820 #define FTS5_PATTERN_LIKE     65  /* matches SQLITE_INDEX_CONSTRAINT_LIKE */
821 #define FTS5_PATTERN_GLOB     66  /* matches SQLITE_INDEX_CONSTRAINT_GLOB */
822 
823 static int sqlite3Fts5ConfigParse(
824     Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char**
825 );
826 static void sqlite3Fts5ConfigFree(Fts5Config*);
827 
828 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig);
829 
830 static int sqlite3Fts5Tokenize(
831   Fts5Config *pConfig,            /* FTS5 Configuration object */
832   int flags,                      /* FTS5_TOKENIZE_* flags */
833   const char *pText, int nText,   /* Text to tokenize */
834   void *pCtx,                     /* Context passed to xToken() */
835   int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
836 );
837 
838 static void sqlite3Fts5Dequote(char *z);
839 
840 /* Load the contents of the %_config table */
841 static int sqlite3Fts5ConfigLoad(Fts5Config*, int);
842 
843 /* Set the value of a single config attribute */
844 static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*);
845 
846 static int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
847 
848 /*
849 ** End of interface to code in fts5_config.c.
850 **************************************************************************/
851 
852 /**************************************************************************
853 ** Interface to code in fts5_buffer.c.
854 */
855 
856 /*
857 ** Buffer object for the incremental building of string data.
858 */
859 typedef struct Fts5Buffer Fts5Buffer;
860 struct Fts5Buffer {
861   u8 *p;
862   int n;
863   int nSpace;
864 };
865 
866 static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32);
867 static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64);
868 static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*);
869 static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*);
870 static void sqlite3Fts5BufferFree(Fts5Buffer*);
871 static void sqlite3Fts5BufferZero(Fts5Buffer*);
872 static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*);
873 static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...);
874 
875 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...);
876 
877 #define fts5BufferZero(x)             sqlite3Fts5BufferZero(x)
878 #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c)
879 #define fts5BufferFree(a)             sqlite3Fts5BufferFree(a)
880 #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d)
881 #define fts5BufferSet(a,b,c,d)        sqlite3Fts5BufferSet(a,b,c,d)
882 
883 #define fts5BufferGrow(pRc,pBuf,nn) ( \
884   (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \
885     sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \
886 )
887 
888 /* Write and decode big-endian 32-bit integer values */
889 static void sqlite3Fts5Put32(u8*, int);
890 static int sqlite3Fts5Get32(const u8*);
891 
892 #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
893 #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF)
894 
895 typedef struct Fts5PoslistReader Fts5PoslistReader;
896 struct Fts5PoslistReader {
897   /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */
898   const u8 *a;                    /* Position list to iterate through */
899   int n;                          /* Size of buffer at a[] in bytes */
900   int i;                          /* Current offset in a[] */
901 
902   u8 bFlag;                       /* For client use (any custom purpose) */
903 
904   /* Output variables */
905   u8 bEof;                        /* Set to true at EOF */
906   i64 iPos;                       /* (iCol<<32) + iPos */
907 };
908 static int sqlite3Fts5PoslistReaderInit(
909   const u8 *a, int n,             /* Poslist buffer to iterate through */
910   Fts5PoslistReader *pIter        /* Iterator object to initialize */
911 );
912 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*);
913 
914 typedef struct Fts5PoslistWriter Fts5PoslistWriter;
915 struct Fts5PoslistWriter {
916   i64 iPrev;
917 };
918 static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64);
919 static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64);
920 
921 static int sqlite3Fts5PoslistNext64(
922   const u8 *a, int n,             /* Buffer containing poslist */
923   int *pi,                        /* IN/OUT: Offset within a[] */
924   i64 *piOff                      /* IN/OUT: Current offset */
925 );
926 
927 /* Malloc utility */
928 static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte);
929 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn);
930 
931 /* Character set tests (like isspace(), isalpha() etc.) */
932 static int sqlite3Fts5IsBareword(char t);
933 
934 
935 /* Bucket of terms object used by the integrity-check in offsets=0 mode. */
936 typedef struct Fts5Termset Fts5Termset;
937 static int sqlite3Fts5TermsetNew(Fts5Termset**);
938 static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent);
939 static void sqlite3Fts5TermsetFree(Fts5Termset*);
940 
941 /*
942 ** End of interface to code in fts5_buffer.c.
943 **************************************************************************/
944 
945 /**************************************************************************
946 ** Interface to code in fts5_index.c. fts5_index.c contains contains code
947 ** to access the data stored in the %_data table.
948 */
949 
950 typedef struct Fts5Index Fts5Index;
951 typedef struct Fts5IndexIter Fts5IndexIter;
952 
953 struct Fts5IndexIter {
954   i64 iRowid;
955   const u8 *pData;
956   int nData;
957   u8 bEof;
958 };
959 
960 #define sqlite3Fts5IterEof(x) ((x)->bEof)
961 
962 /*
963 ** Values used as part of the flags argument passed to IndexQuery().
964 */
965 #define FTS5INDEX_QUERY_PREFIX     0x0001   /* Prefix query */
966 #define FTS5INDEX_QUERY_DESC       0x0002   /* Docs in descending rowid order */
967 #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004   /* Do not use prefix index */
968 #define FTS5INDEX_QUERY_SCAN       0x0008   /* Scan query (fts5vocab) */
969 
970 /* The following are used internally by the fts5_index.c module. They are
971 ** defined here only to make it easier to avoid clashes with the flags
972 ** above. */
973 #define FTS5INDEX_QUERY_SKIPEMPTY  0x0010
974 #define FTS5INDEX_QUERY_NOOUTPUT   0x0020
975 
976 /*
977 ** Create/destroy an Fts5Index object.
978 */
979 static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**);
980 static int sqlite3Fts5IndexClose(Fts5Index *p);
981 
982 /*
983 ** Return a simple checksum value based on the arguments.
984 */
985 static u64 sqlite3Fts5IndexEntryCksum(
986   i64 iRowid,
987   int iCol,
988   int iPos,
989   int iIdx,
990   const char *pTerm,
991   int nTerm
992 );
993 
994 /*
995 ** Argument p points to a buffer containing utf-8 text that is n bytes in
996 ** size. Return the number of bytes in the nChar character prefix of the
997 ** buffer, or 0 if there are less than nChar characters in total.
998 */
999 static int sqlite3Fts5IndexCharlenToBytelen(
1000   const char *p,
1001   int nByte,
1002   int nChar
1003 );
1004 
1005 /*
1006 ** Open a new iterator to iterate though all rowids that match the
1007 ** specified token or token prefix.
1008 */
1009 static int sqlite3Fts5IndexQuery(
1010   Fts5Index *p,                   /* FTS index to query */
1011   const char *pToken, int nToken, /* Token (or prefix) to query for */
1012   int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
1013   Fts5Colset *pColset,            /* Match these columns only */
1014   Fts5IndexIter **ppIter          /* OUT: New iterator object */
1015 );
1016 
1017 /*
1018 ** The various operations on open token or token prefix iterators opened
1019 ** using sqlite3Fts5IndexQuery().
1020 */
1021 static int sqlite3Fts5IterNext(Fts5IndexIter*);
1022 static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch);
1023 
1024 /*
1025 ** Close an iterator opened by sqlite3Fts5IndexQuery().
1026 */
1027 static void sqlite3Fts5IterClose(Fts5IndexIter*);
1028 
1029 /*
1030 ** Close the reader blob handle, if it is open.
1031 */
1032 static void sqlite3Fts5IndexCloseReader(Fts5Index*);
1033 
1034 /*
1035 ** This interface is used by the fts5vocab module.
1036 */
1037 static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*);
1038 static int sqlite3Fts5IterNextScan(Fts5IndexIter*);
1039 static void *sqlite3Fts5StructureRef(Fts5Index*);
1040 static void sqlite3Fts5StructureRelease(void*);
1041 static int sqlite3Fts5StructureTest(Fts5Index*, void*);
1042 
1043 
1044 /*
1045 ** Insert or remove data to or from the index. Each time a document is
1046 ** added to or removed from the index, this function is called one or more
1047 ** times.
1048 **
1049 ** For an insert, it must be called once for each token in the new document.
1050 ** If the operation is a delete, it must be called (at least) once for each
1051 ** unique token in the document with an iCol value less than zero. The iPos
1052 ** argument is ignored for a delete.
1053 */
1054 static int sqlite3Fts5IndexWrite(
1055   Fts5Index *p,                   /* Index to write to */
1056   int iCol,                       /* Column token appears in (-ve -> delete) */
1057   int iPos,                       /* Position of token within column */
1058   const char *pToken, int nToken  /* Token to add or remove to or from index */
1059 );
1060 
1061 /*
1062 ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to
1063 ** document iDocid.
1064 */
1065 static int sqlite3Fts5IndexBeginWrite(
1066   Fts5Index *p,                   /* Index to write to */
1067   int bDelete,                    /* True if current operation is a delete */
1068   i64 iDocid                      /* Docid to add or remove data from */
1069 );
1070 
1071 /*
1072 ** Flush any data stored in the in-memory hash tables to the database.
1073 ** Also close any open blob handles.
1074 */
1075 static int sqlite3Fts5IndexSync(Fts5Index *p);
1076 
1077 /*
1078 ** Discard any data stored in the in-memory hash tables. Do not write it
1079 ** to the database. Additionally, assume that the contents of the %_data
1080 ** table may have changed on disk. So any in-memory caches of %_data
1081 ** records must be invalidated.
1082 */
1083 static int sqlite3Fts5IndexRollback(Fts5Index *p);
1084 
1085 /*
1086 ** Get or set the "averages" values.
1087 */
1088 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize);
1089 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int);
1090 
1091 /*
1092 ** Functions called by the storage module as part of integrity-check.
1093 */
1094 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum);
1095 
1096 /*
1097 ** Called during virtual module initialization to register UDF
1098 ** fts5_decode() with SQLite
1099 */
1100 static int sqlite3Fts5IndexInit(sqlite3*);
1101 
1102 static int sqlite3Fts5IndexSetCookie(Fts5Index*, int);
1103 
1104 /*
1105 ** Return the total number of entries read from the %_data table by
1106 ** this connection since it was created.
1107 */
1108 static int sqlite3Fts5IndexReads(Fts5Index *p);
1109 
1110 static int sqlite3Fts5IndexReinit(Fts5Index *p);
1111 static int sqlite3Fts5IndexOptimize(Fts5Index *p);
1112 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge);
1113 static int sqlite3Fts5IndexReset(Fts5Index *p);
1114 
1115 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p);
1116 
1117 /*
1118 ** End of interface to code in fts5_index.c.
1119 **************************************************************************/
1120 
1121 /**************************************************************************
1122 ** Interface to code in fts5_varint.c.
1123 */
1124 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v);
1125 static int sqlite3Fts5GetVarintLen(u32 iVal);
1126 static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*);
1127 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v);
1128 
1129 #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b)
1130 #define fts5GetVarint    sqlite3Fts5GetVarint
1131 
1132 #define fts5FastGetVarint32(a, iOff, nVal) {      \
1133   nVal = (a)[iOff++];                             \
1134   if( nVal & 0x80 ){                              \
1135     iOff--;                                       \
1136     iOff += fts5GetVarint32(&(a)[iOff], nVal);    \
1137   }                                               \
1138 }
1139 
1140 
1141 /*
1142 ** End of interface to code in fts5_varint.c.
1143 **************************************************************************/
1144 
1145 
1146 /**************************************************************************
1147 ** Interface to code in fts5_main.c.
1148 */
1149 
1150 /*
1151 ** Virtual-table object.
1152 */
1153 typedef struct Fts5Table Fts5Table;
1154 struct Fts5Table {
1155   sqlite3_vtab base;              /* Base class used by SQLite core */
1156   Fts5Config *pConfig;            /* Virtual table configuration */
1157   Fts5Index *pIndex;              /* Full-text index */
1158 };
1159 
1160 static int sqlite3Fts5GetTokenizer(
1161   Fts5Global*,
1162   const char **azArg,
1163   int nArg,
1164   Fts5Config*,
1165   char **pzErr
1166 );
1167 
1168 static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64);
1169 
1170 static int sqlite3Fts5FlushToDisk(Fts5Table*);
1171 
1172 /*
1173 ** End of interface to code in fts5.c.
1174 **************************************************************************/
1175 
1176 /**************************************************************************
1177 ** Interface to code in fts5_hash.c.
1178 */
1179 typedef struct Fts5Hash Fts5Hash;
1180 
1181 /*
1182 ** Create a hash table, free a hash table.
1183 */
1184 static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize);
1185 static void sqlite3Fts5HashFree(Fts5Hash*);
1186 
1187 static int sqlite3Fts5HashWrite(
1188   Fts5Hash*,
1189   i64 iRowid,                     /* Rowid for this entry */
1190   int iCol,                       /* Column token appears in (-ve -> delete) */
1191   int iPos,                       /* Position of token within column */
1192   char bByte,
1193   const char *pToken, int nToken  /* Token to add or remove to or from index */
1194 );
1195 
1196 /*
1197 ** Empty (but do not delete) a hash table.
1198 */
1199 static void sqlite3Fts5HashClear(Fts5Hash*);
1200 
1201 static int sqlite3Fts5HashQuery(
1202   Fts5Hash*,                      /* Hash table to query */
1203   int nPre,
1204   const char *pTerm, int nTerm,   /* Query term */
1205   void **ppObj,                   /* OUT: Pointer to doclist for pTerm */
1206   int *pnDoclist                  /* OUT: Size of doclist in bytes */
1207 );
1208 
1209 static int sqlite3Fts5HashScanInit(
1210   Fts5Hash*,                      /* Hash table to query */
1211   const char *pTerm, int nTerm    /* Query prefix */
1212 );
1213 static void sqlite3Fts5HashScanNext(Fts5Hash*);
1214 static int sqlite3Fts5HashScanEof(Fts5Hash*);
1215 static void sqlite3Fts5HashScanEntry(Fts5Hash *,
1216   const char **pzTerm,            /* OUT: term (nul-terminated) */
1217   const u8 **ppDoclist,           /* OUT: pointer to doclist */
1218   int *pnDoclist                  /* OUT: size of doclist in bytes */
1219 );
1220 
1221 
1222 /*
1223 ** End of interface to code in fts5_hash.c.
1224 **************************************************************************/
1225 
1226 /**************************************************************************
1227 ** Interface to code in fts5_storage.c. fts5_storage.c contains contains
1228 ** code to access the data stored in the %_content and %_docsize tables.
1229 */
1230 
1231 #define FTS5_STMT_SCAN_ASC  0     /* SELECT rowid, * FROM ... ORDER BY 1 ASC */
1232 #define FTS5_STMT_SCAN_DESC 1     /* SELECT rowid, * FROM ... ORDER BY 1 DESC */
1233 #define FTS5_STMT_LOOKUP    2     /* SELECT rowid, * FROM ... WHERE rowid=? */
1234 
1235 typedef struct Fts5Storage Fts5Storage;
1236 
1237 static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**);
1238 static int sqlite3Fts5StorageClose(Fts5Storage *p);
1239 static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName);
1240 
1241 static int sqlite3Fts5DropAll(Fts5Config*);
1242 static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **);
1243 
1244 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**);
1245 static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*);
1246 static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64);
1247 
1248 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg);
1249 
1250 static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**);
1251 static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
1252 
1253 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
1254 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
1255 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
1256 
1257 static int sqlite3Fts5StorageSync(Fts5Storage *p);
1258 static int sqlite3Fts5StorageRollback(Fts5Storage *p);
1259 
1260 static int sqlite3Fts5StorageConfigValue(
1261     Fts5Storage *p, const char*, sqlite3_value*, int
1262 );
1263 
1264 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p);
1265 static int sqlite3Fts5StorageRebuild(Fts5Storage *p);
1266 static int sqlite3Fts5StorageOptimize(Fts5Storage *p);
1267 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge);
1268 static int sqlite3Fts5StorageReset(Fts5Storage *p);
1269 
1270 /*
1271 ** End of interface to code in fts5_storage.c.
1272 **************************************************************************/
1273 
1274 
1275 /**************************************************************************
1276 ** Interface to code in fts5_expr.c.
1277 */
1278 typedef struct Fts5Expr Fts5Expr;
1279 typedef struct Fts5ExprNode Fts5ExprNode;
1280 typedef struct Fts5Parse Fts5Parse;
1281 typedef struct Fts5Token Fts5Token;
1282 typedef struct Fts5ExprPhrase Fts5ExprPhrase;
1283 typedef struct Fts5ExprNearset Fts5ExprNearset;
1284 
1285 struct Fts5Token {
1286   const char *p;                  /* Token text (not NULL terminated) */
1287   int n;                          /* Size of buffer p in bytes */
1288 };
1289 
1290 /* Parse a MATCH expression. */
1291 static int sqlite3Fts5ExprNew(
1292   Fts5Config *pConfig,
1293   int bPhraseToAnd,
1294   int iCol,                       /* Column on LHS of MATCH operator */
1295   const char *zExpr,
1296   Fts5Expr **ppNew,
1297   char **pzErr
1298 );
1299 static int sqlite3Fts5ExprPattern(
1300   Fts5Config *pConfig,
1301   int bGlob,
1302   int iCol,
1303   const char *zText,
1304   Fts5Expr **pp
1305 );
1306 
1307 /*
1308 ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc);
1309 **     rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr);
1310 **     rc = sqlite3Fts5ExprNext(pExpr)
1311 ** ){
1312 **   // The document with rowid iRowid matches the expression!
1313 **   i64 iRowid = sqlite3Fts5ExprRowid(pExpr);
1314 ** }
1315 */
1316 static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc);
1317 static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax);
1318 static int sqlite3Fts5ExprEof(Fts5Expr*);
1319 static i64 sqlite3Fts5ExprRowid(Fts5Expr*);
1320 
1321 static void sqlite3Fts5ExprFree(Fts5Expr*);
1322 static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2);
1323 
1324 /* Called during startup to register a UDF with SQLite */
1325 static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*);
1326 
1327 static int sqlite3Fts5ExprPhraseCount(Fts5Expr*);
1328 static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
1329 static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
1330 
1331 typedef struct Fts5PoslistPopulator Fts5PoslistPopulator;
1332 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int);
1333 static int sqlite3Fts5ExprPopulatePoslists(
1334     Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int
1335 );
1336 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64);
1337 
1338 static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**);
1339 
1340 static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *);
1341 
1342 /*******************************************
1343 ** The fts5_expr.c API above this point is used by the other hand-written
1344 ** C code in this module. The interfaces below this point are called by
1345 ** the parser code in fts5parse.y.  */
1346 
1347 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...);
1348 
1349 static Fts5ExprNode *sqlite3Fts5ParseNode(
1350   Fts5Parse *pParse,
1351   int eType,
1352   Fts5ExprNode *pLeft,
1353   Fts5ExprNode *pRight,
1354   Fts5ExprNearset *pNear
1355 );
1356 
1357 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
1358   Fts5Parse *pParse,
1359   Fts5ExprNode *pLeft,
1360   Fts5ExprNode *pRight
1361 );
1362 
1363 static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
1364   Fts5Parse *pParse,
1365   Fts5ExprPhrase *pPhrase,
1366   Fts5Token *pToken,
1367   int bPrefix
1368 );
1369 
1370 static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*);
1371 
1372 static Fts5ExprNearset *sqlite3Fts5ParseNearset(
1373   Fts5Parse*,
1374   Fts5ExprNearset*,
1375   Fts5ExprPhrase*
1376 );
1377 
1378 static Fts5Colset *sqlite3Fts5ParseColset(
1379   Fts5Parse*,
1380   Fts5Colset*,
1381   Fts5Token *
1382 );
1383 
1384 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*);
1385 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*);
1386 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*);
1387 
1388 static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*);
1389 static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*);
1390 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*);
1391 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p);
1392 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*);
1393 
1394 /*
1395 ** End of interface to code in fts5_expr.c.
1396 **************************************************************************/
1397 
1398 
1399 
1400 /**************************************************************************
1401 ** Interface to code in fts5_aux.c.
1402 */
1403 
1404 static int sqlite3Fts5AuxInit(fts5_api*);
1405 /*
1406 ** End of interface to code in fts5_aux.c.
1407 **************************************************************************/
1408 
1409 /**************************************************************************
1410 ** Interface to code in fts5_tokenizer.c.
1411 */
1412 
1413 static int sqlite3Fts5TokenizerInit(fts5_api*);
1414 static int sqlite3Fts5TokenizerPattern(
1415     int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
1416     Fts5Tokenizer *pTok
1417 );
1418 /*
1419 ** End of interface to code in fts5_tokenizer.c.
1420 **************************************************************************/
1421 
1422 /**************************************************************************
1423 ** Interface to code in fts5_vocab.c.
1424 */
1425 
1426 static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*);
1427 
1428 /*
1429 ** End of interface to code in fts5_vocab.c.
1430 **************************************************************************/
1431 
1432 
1433 /**************************************************************************
1434 ** Interface to automatically generated code in fts5_unicode2.c.
1435 */
1436 static int sqlite3Fts5UnicodeIsdiacritic(int c);
1437 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
1438 
1439 static int sqlite3Fts5UnicodeCatParse(const char*, u8*);
1440 static int sqlite3Fts5UnicodeCategory(u32 iCode);
1441 static void sqlite3Fts5UnicodeAscii(u8*, u8*);
1442 /*
1443 ** End of interface to code in fts5_unicode2.c.
1444 **************************************************************************/
1445 
1446 #endif
1447 
1448 #line 1 "fts5parse.h"
1449 #define FTS5_OR                               1
1450 #define FTS5_AND                              2
1451 #define FTS5_NOT                              3
1452 #define FTS5_TERM                             4
1453 #define FTS5_COLON                            5
1454 #define FTS5_MINUS                            6
1455 #define FTS5_LCP                              7
1456 #define FTS5_RCP                              8
1457 #define FTS5_STRING                           9
1458 #define FTS5_LP                              10
1459 #define FTS5_RP                              11
1460 #define FTS5_CARET                           12
1461 #define FTS5_COMMA                           13
1462 #define FTS5_PLUS                            14
1463 #define FTS5_STAR                            15
1464 
1465 #line 1 "fts5parse.c"
1466 /* This file is automatically generated by Lemon from input grammar
1467 ** source file "fts5parse.y". */
1468 /*
1469 ** 2000-05-29
1470 **
1471 ** The author disclaims copyright to this source code.  In place of
1472 ** a legal notice, here is a blessing:
1473 **
1474 **    May you do good and not evil.
1475 **    May you find forgiveness for yourself and forgive others.
1476 **    May you share freely, never taking more than you give.
1477 **
1478 *************************************************************************
1479 ** Driver template for the LEMON parser generator.
1480 **
1481 ** The "lemon" program processes an LALR(1) input grammar file, then uses
1482 ** this template to construct a parser.  The "lemon" program inserts text
1483 ** at each "%%" line.  Also, any "P-a-r-s-e" identifer prefix (without the
1484 ** interstitial "-" characters) contained in this template is changed into
1485 ** the value of the %name directive from the grammar.  Otherwise, the content
1486 ** of this template is copied straight through into the generate parser
1487 ** source file.
1488 **
1489 ** The following is the concatenation of all %include directives from the
1490 ** input grammar file:
1491 */
1492 /************ Begin %include sections from the grammar ************************/
1493 #line 47 "fts5parse.y"
1494 
1495 /* #include "fts5Int.h" */
1496 /* #include "fts5parse.h" */
1497 
1498 /*
1499 ** Disable all error recovery processing in the parser push-down
1500 ** automaton.
1501 */
1502 #define fts5YYNOERRORRECOVERY 1
1503 
1504 /*
1505 ** Make fts5yytestcase() the same as testcase()
1506 */
1507 #define fts5yytestcase(X) testcase(X)
1508 
1509 /*
1510 ** Indicate that sqlite3ParserFree() will never be called with a null
1511 ** pointer.
1512 */
1513 #define fts5YYPARSEFREENOTNULL 1
1514 
1515 /*
1516 ** Alternative datatype for the argument to the malloc() routine passed
1517 ** into sqlite3ParserAlloc().  The default is size_t.
1518 */
1519 #define fts5YYMALLOCARGTYPE  u64
1520 
1521 #line 57 "fts5parse.sql"
1522 /**************** End of %include directives **********************************/
1523 /* These constants specify the various numeric values for terminal symbols.
1524 ***************** Begin token definitions *************************************/
1525 #ifndef FTS5_OR
1526 #define FTS5_OR                              1
1527 #define FTS5_AND                             2
1528 #define FTS5_NOT                             3
1529 #define FTS5_TERM                            4
1530 #define FTS5_COLON                           5
1531 #define FTS5_MINUS                           6
1532 #define FTS5_LCP                             7
1533 #define FTS5_RCP                             8
1534 #define FTS5_STRING                          9
1535 #define FTS5_LP                             10
1536 #define FTS5_RP                             11
1537 #define FTS5_CARET                          12
1538 #define FTS5_COMMA                          13
1539 #define FTS5_PLUS                           14
1540 #define FTS5_STAR                           15
1541 #endif
1542 /**************** End token definitions ***************************************/
1543 
1544 /* The next sections is a series of control #defines.
1545 ** various aspects of the generated parser.
1546 **    fts5YYCODETYPE         is the data type used to store the integer codes
1547 **                       that represent terminal and non-terminal symbols.
1548 **                       "unsigned char" is used if there are fewer than
1549 **                       256 symbols.  Larger types otherwise.
1550 **    fts5YYNOCODE           is a number of type fts5YYCODETYPE that is not used for
1551 **                       any terminal or nonterminal symbol.
1552 **    fts5YYFALLBACK         If defined, this indicates that one or more tokens
1553 **                       (also known as: "terminal symbols") have fall-back
1554 **                       values which should be used if the original symbol
1555 **                       would not parse.  This permits keywords to sometimes
1556 **                       be used as identifiers, for example.
1557 **    fts5YYACTIONTYPE       is the data type used for "action codes" - numbers
1558 **                       that indicate what to do in response to the next
1559 **                       token.
1560 **    sqlite3Fts5ParserFTS5TOKENTYPE     is the data type used for minor type for terminal
1561 **                       symbols.  Background: A "minor type" is a semantic
1562 **                       value associated with a terminal or non-terminal
1563 **                       symbols.  For example, for an "ID" terminal symbol,
1564 **                       the minor type might be the name of the identifier.
1565 **                       Each non-terminal can have a different minor type.
1566 **                       Terminal symbols all have the same minor type, though.
1567 **                       This macros defines the minor type for terminal
1568 **                       symbols.
1569 **    fts5YYMINORTYPE        is the data type used for all minor types.
1570 **                       This is typically a union of many types, one of
1571 **                       which is sqlite3Fts5ParserFTS5TOKENTYPE.  The entry in the union
1572 **                       for terminal symbols is called "fts5yy0".
1573 **    fts5YYSTACKDEPTH       is the maximum depth of the parser's stack.  If
1574 **                       zero the stack is dynamically sized using realloc()
1575 **    sqlite3Fts5ParserARG_SDECL     A static variable declaration for the %extra_argument
1576 **    sqlite3Fts5ParserARG_PDECL     A parameter declaration for the %extra_argument
1577 **    sqlite3Fts5ParserARG_PARAM     Code to pass %extra_argument as a subroutine parameter
1578 **    sqlite3Fts5ParserARG_STORE     Code to store %extra_argument into fts5yypParser
1579 **    sqlite3Fts5ParserARG_FETCH     Code to extract %extra_argument from fts5yypParser
1580 **    sqlite3Fts5ParserCTX_*         As sqlite3Fts5ParserARG_ except for %extra_context
1581 **    fts5YYERRORSYMBOL      is the code number of the error symbol.  If not
1582 **                       defined, then do no error processing.
1583 **    fts5YYNSTATE           the combined number of states.
1584 **    fts5YYNRULE            the number of rules in the grammar
1585 **    fts5YYNFTS5TOKEN           Number of terminal symbols
1586 **    fts5YY_MAX_SHIFT       Maximum value for shift actions
1587 **    fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions
1588 **    fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions
1589 **    fts5YY_ERROR_ACTION    The fts5yy_action[] code for syntax error
1590 **    fts5YY_ACCEPT_ACTION   The fts5yy_action[] code for accept
1591 **    fts5YY_NO_ACTION       The fts5yy_action[] code for no-op
1592 **    fts5YY_MIN_REDUCE      Minimum value for reduce actions
1593 **    fts5YY_MAX_REDUCE      Maximum value for reduce actions
1594 */
1595 #ifndef INTERFACE
1596 # define INTERFACE 1
1597 #endif
1598 /************* Begin control #defines *****************************************/
1599 #define fts5YYCODETYPE unsigned char
1600 #define fts5YYNOCODE 27
1601 #define fts5YYACTIONTYPE unsigned char
1602 #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token
1603 typedef union {
1604   int fts5yyinit;
1605   sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0;
1606   int fts5yy4;
1607   Fts5Colset* fts5yy11;
1608   Fts5ExprNode* fts5yy24;
1609   Fts5ExprNearset* fts5yy46;
1610   Fts5ExprPhrase* fts5yy53;
1611 } fts5YYMINORTYPE;
1612 #ifndef fts5YYSTACKDEPTH
1613 #define fts5YYSTACKDEPTH 100
1614 #endif
1615 #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse;
1616 #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse
1617 #define sqlite3Fts5ParserARG_PARAM ,pParse
1618 #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse=fts5yypParser->pParse;
1619 #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse=pParse;
1620 #define sqlite3Fts5ParserCTX_SDECL
1621 #define sqlite3Fts5ParserCTX_PDECL
1622 #define sqlite3Fts5ParserCTX_PARAM
1623 #define sqlite3Fts5ParserCTX_FETCH
1624 #define sqlite3Fts5ParserCTX_STORE
1625 #define fts5YYNSTATE             35
1626 #define fts5YYNRULE              28
1627 #define fts5YYNRULE_WITH_ACTION  28
1628 #define fts5YYNFTS5TOKEN             16
1629 #define fts5YY_MAX_SHIFT         34
1630 #define fts5YY_MIN_SHIFTREDUCE   52
1631 #define fts5YY_MAX_SHIFTREDUCE   79
1632 #define fts5YY_ERROR_ACTION      80
1633 #define fts5YY_ACCEPT_ACTION     81
1634 #define fts5YY_NO_ACTION         82
1635 #define fts5YY_MIN_REDUCE        83
1636 #define fts5YY_MAX_REDUCE        110
1637 /************* End control #defines *******************************************/
1638 #define fts5YY_NLOOKAHEAD ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])))
1639 
1640 /* Define the fts5yytestcase() macro to be a no-op if is not already defined
1641 ** otherwise.
1642 **
1643 ** Applications can choose to define fts5yytestcase() in the %include section
1644 ** to a macro that can assist in verifying code coverage.  For production
1645 ** code the fts5yytestcase() macro should be turned off.  But it is useful
1646 ** for testing.
1647 */
1648 #ifndef fts5yytestcase
1649 # define fts5yytestcase(X)
1650 #endif
1651 
1652 
1653 /* Next are the tables used to determine what action to take based on the
1654 ** current state and lookahead token.  These tables are used to implement
1655 ** functions that take a state number and lookahead value and return an
1656 ** action integer.
1657 **
1658 ** Suppose the action integer is N.  Then the action is determined as
1659 ** follows
1660 **
1661 **   0 <= N <= fts5YY_MAX_SHIFT             Shift N.  That is, push the lookahead
1662 **                                      token onto the stack and goto state N.
1663 **
1664 **   N between fts5YY_MIN_SHIFTREDUCE       Shift to an arbitrary state then
1665 **     and fts5YY_MAX_SHIFTREDUCE           reduce by rule N-fts5YY_MIN_SHIFTREDUCE.
1666 **
1667 **   N == fts5YY_ERROR_ACTION               A syntax error has occurred.
1668 **
1669 **   N == fts5YY_ACCEPT_ACTION              The parser accepts its input.
1670 **
1671 **   N == fts5YY_NO_ACTION                  No such action.  Denotes unused
1672 **                                      slots in the fts5yy_action[] table.
1673 **
1674 **   N between fts5YY_MIN_REDUCE            Reduce by rule N-fts5YY_MIN_REDUCE
1675 **     and fts5YY_MAX_REDUCE
1676 **
1677 ** The action table is constructed as a single large table named fts5yy_action[].
1678 ** Given state S and lookahead X, the action is computed as either:
1679 **
1680 **    (A)   N = fts5yy_action[ fts5yy_shift_ofst[S] + X ]
1681 **    (B)   N = fts5yy_default[S]
1682 **
1683 ** The (A) formula is preferred.  The B formula is used instead if
1684 ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X.
1685 **
1686 ** The formulas above are for computing the action when the lookahead is
1687 ** a terminal symbol.  If the lookahead is a non-terminal (as occurs after
1688 ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of
1689 ** the fts5yy_shift_ofst[] array.
1690 **
1691 ** The following are the tables generated in this section:
1692 **
1693 **  fts5yy_action[]        A single table containing all actions.
1694 **  fts5yy_lookahead[]     A table containing the lookahead for each entry in
1695 **                     fts5yy_action.  Used to detect hash collisions.
1696 **  fts5yy_shift_ofst[]    For each state, the offset into fts5yy_action for
1697 **                     shifting terminals.
1698 **  fts5yy_reduce_ofst[]   For each state, the offset into fts5yy_action for
1699 **                     shifting non-terminals after a reduce.
1700 **  fts5yy_default[]       Default action for each state.
1701 **
1702 *********** Begin parsing tables **********************************************/
1703 #define fts5YY_ACTTAB_COUNT (105)
1704 static const fts5YYACTIONTYPE fts5yy_action[] = {
1705  /*     0 */    81,   20,   96,    6,   28,   99,   98,   26,   26,   18,
1706  /*    10 */    96,    6,   28,   17,   98,   56,   26,   19,   96,    6,
1707  /*    20 */    28,   14,   98,   14,   26,   31,   92,   96,    6,   28,
1708  /*    30 */   108,   98,   25,   26,   21,   96,    6,   28,   78,   98,
1709  /*    40 */    58,   26,   29,   96,    6,   28,  107,   98,   22,   26,
1710  /*    50 */    24,   16,   12,   11,    1,   13,   13,   24,   16,   23,
1711  /*    60 */    11,   33,   34,   13,   97,    8,   27,   32,   98,    7,
1712  /*    70 */    26,    3,    4,    5,    3,    4,    5,    3,   83,    4,
1713  /*    80 */     5,    3,   63,    5,    3,   62,   12,    2,   86,   13,
1714  /*    90 */     9,   30,   10,   10,   54,   57,   75,   78,   78,   53,
1715  /*   100 */    57,   15,   82,   82,   71,
1716 };
1717 static const fts5YYCODETYPE fts5yy_lookahead[] = {
1718  /*     0 */    16,   17,   18,   19,   20,   22,   22,   24,   24,   17,
1719  /*    10 */    18,   19,   20,    7,   22,    9,   24,   17,   18,   19,
1720  /*    20 */    20,    9,   22,    9,   24,   13,   17,   18,   19,   20,
1721  /*    30 */    26,   22,   24,   24,   17,   18,   19,   20,   15,   22,
1722  /*    40 */     9,   24,   17,   18,   19,   20,   26,   22,   21,   24,
1723  /*    50 */     6,    7,    9,    9,   10,   12,   12,    6,    7,   21,
1724  /*    60 */     9,   24,   25,   12,   18,    5,   20,   14,   22,    5,
1725  /*    70 */    24,    3,    1,    2,    3,    1,    2,    3,    0,    1,
1726  /*    80 */     2,    3,   11,    2,    3,   11,    9,   10,    5,   12,
1727  /*    90 */    23,   24,   10,   10,    8,    9,    9,   15,   15,    8,
1728  /*   100 */     9,    9,   27,   27,   11,   27,   27,   27,   27,   27,
1729  /*   110 */    27,   27,   27,   27,   27,   27,   27,   27,   27,   27,
1730  /*   120 */    27,
1731 };
1732 #define fts5YY_SHIFT_COUNT    (34)
1733 #define fts5YY_SHIFT_MIN      (0)
1734 #define fts5YY_SHIFT_MAX      (93)
1735 static const unsigned char fts5yy_shift_ofst[] = {
1736  /*     0 */    44,   44,   44,   44,   44,   44,   51,   77,   43,   12,
1737  /*    10 */    14,   83,   82,   14,   23,   23,   31,   31,   71,   74,
1738  /*    20 */    78,   81,   86,   91,    6,   53,   53,   60,   64,   68,
1739  /*    30 */    53,   87,   92,   53,   93,
1740 };
1741 #define fts5YY_REDUCE_COUNT (17)
1742 #define fts5YY_REDUCE_MIN   (-17)
1743 #define fts5YY_REDUCE_MAX   (67)
1744 static const signed char fts5yy_reduce_ofst[] = {
1745  /*     0 */   -16,   -8,    0,    9,   17,   25,   46,  -17,  -17,   37,
1746  /*    10 */    67,    4,    4,    8,    4,   20,   27,   38,
1747 };
1748 static const fts5YYACTIONTYPE fts5yy_default[] = {
1749  /*     0 */    80,   80,   80,   80,   80,   80,   95,   80,   80,  105,
1750  /*    10 */    80,  110,  110,   80,  110,  110,   80,   80,   80,   80,
1751  /*    20 */    80,   91,   80,   80,   80,  101,  100,   80,   80,   90,
1752  /*    30 */   103,   80,   80,  104,   80,
1753 };
1754 /********** End of lemon-generated parsing tables *****************************/
1755 
1756 /* The next table maps tokens (terminal symbols) into fallback tokens.
1757 ** If a construct like the following:
1758 **
1759 **      %fallback ID X Y Z.
1760 **
1761 ** appears in the grammar, then ID becomes a fallback token for X, Y,
1762 ** and Z.  Whenever one of the tokens X, Y, or Z is input to the parser
1763 ** but it does not parse, the type of the token is changed to ID and
1764 ** the parse is retried before an error is thrown.
1765 **
1766 ** This feature can be used, for example, to cause some keywords in a language
1767 ** to revert to identifiers if they keyword does not apply in the context where
1768 ** it appears.
1769 */
1770 #ifdef fts5YYFALLBACK
1771 static const fts5YYCODETYPE fts5yyFallback[] = {
1772 };
1773 #endif /* fts5YYFALLBACK */
1774 
1775 /* The following structure represents a single element of the
1776 ** parser's stack.  Information stored includes:
1777 **
1778 **   +  The state number for the parser at this level of the stack.
1779 **
1780 **   +  The value of the token stored at this level of the stack.
1781 **      (In other words, the "major" token.)
1782 **
1783 **   +  The semantic value stored at this level of the stack.  This is
1784 **      the information used by the action routines in the grammar.
1785 **      It is sometimes called the "minor" token.
1786 **
1787 ** After the "shift" half of a SHIFTREDUCE action, the stateno field
1788 ** actually contains the reduce action for the second half of the
1789 ** SHIFTREDUCE.
1790 */
1791 struct fts5yyStackEntry {
1792   fts5YYACTIONTYPE stateno;  /* The state-number, or reduce action in SHIFTREDUCE */
1793   fts5YYCODETYPE major;      /* The major token value.  This is the code
1794                          ** number for the token at this stack level */
1795   fts5YYMINORTYPE minor;     /* The user-supplied minor token value.  This
1796                          ** is the value of the token  */
1797 };
1798 typedef struct fts5yyStackEntry fts5yyStackEntry;
1799 
1800 /* The state of the parser is completely contained in an instance of
1801 ** the following structure */
1802 struct fts5yyParser {
1803   fts5yyStackEntry *fts5yytos;          /* Pointer to top element of the stack */
1804 #ifdef fts5YYTRACKMAXSTACKDEPTH
1805   int fts5yyhwm;                    /* High-water mark of the stack */
1806 #endif
1807 #ifndef fts5YYNOERRORRECOVERY
1808   int fts5yyerrcnt;                 /* Shifts left before out of the error */
1809 #endif
1810   sqlite3Fts5ParserARG_SDECL                /* A place to hold %extra_argument */
1811   sqlite3Fts5ParserCTX_SDECL                /* A place to hold %extra_context */
1812 #if fts5YYSTACKDEPTH<=0
1813   int fts5yystksz;                  /* Current side of the stack */
1814   fts5yyStackEntry *fts5yystack;        /* The parser's stack */
1815   fts5yyStackEntry fts5yystk0;          /* First stack entry */
1816 #else
1817   fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH];  /* The parser's stack */
1818   fts5yyStackEntry *fts5yystackEnd;            /* Last entry in the stack */
1819 #endif
1820 };
1821 typedef struct fts5yyParser fts5yyParser;
1822 
1823 #include <assert.h>
1824 #ifndef NDEBUG
1825 #include <stdio.h>
1826 static FILE *fts5yyTraceFILE = 0;
1827 static char *fts5yyTracePrompt = 0;
1828 #endif /* NDEBUG */
1829 
1830 #ifndef NDEBUG
1831 /*
1832 ** Turn parser tracing on by giving a stream to which to write the trace
1833 ** and a prompt to preface each trace message.  Tracing is turned off
1834 ** by making either argument NULL
1835 **
1836 ** Inputs:
1837 ** <ul>
1838 ** <li> A FILE* to which trace output should be written.
1839 **      If NULL, then tracing is turned off.
1840 ** <li> A prefix string written at the beginning of every
1841 **      line of trace output.  If NULL, then tracing is
1842 **      turned off.
1843 ** </ul>
1844 **
1845 ** Outputs:
1846 ** None.
1847 */
1848 static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){
1849   fts5yyTraceFILE = TraceFILE;
1850   fts5yyTracePrompt = zTracePrompt;
1851   if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0;
1852   else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0;
1853 }
1854 #endif /* NDEBUG */
1855 
1856 #if defined(fts5YYCOVERAGE) || !defined(NDEBUG)
1857 /* For tracing shifts, the names of all terminals and nonterminals
1858 ** are required.  The following table supplies these names */
1859 static const char *const fts5yyTokenName[] = {
1860   /*    0 */ "$",
1861   /*    1 */ "OR",
1862   /*    2 */ "AND",
1863   /*    3 */ "NOT",
1864   /*    4 */ "TERM",
1865   /*    5 */ "COLON",
1866   /*    6 */ "MINUS",
1867   /*    7 */ "LCP",
1868   /*    8 */ "RCP",
1869   /*    9 */ "STRING",
1870   /*   10 */ "LP",
1871   /*   11 */ "RP",
1872   /*   12 */ "CARET",
1873   /*   13 */ "COMMA",
1874   /*   14 */ "PLUS",
1875   /*   15 */ "STAR",
1876   /*   16 */ "input",
1877   /*   17 */ "expr",
1878   /*   18 */ "cnearset",
1879   /*   19 */ "exprlist",
1880   /*   20 */ "colset",
1881   /*   21 */ "colsetlist",
1882   /*   22 */ "nearset",
1883   /*   23 */ "nearphrases",
1884   /*   24 */ "phrase",
1885   /*   25 */ "neardist_opt",
1886   /*   26 */ "star_opt",
1887 };
1888 #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */
1889 
1890 #ifndef NDEBUG
1891 /* For tracing reduce actions, the names of all rules are required.
1892 */
1893 static const char *const fts5yyRuleName[] = {
1894  /*   0 */ "input ::= expr",
1895  /*   1 */ "colset ::= MINUS LCP colsetlist RCP",
1896  /*   2 */ "colset ::= LCP colsetlist RCP",
1897  /*   3 */ "colset ::= STRING",
1898  /*   4 */ "colset ::= MINUS STRING",
1899  /*   5 */ "colsetlist ::= colsetlist STRING",
1900  /*   6 */ "colsetlist ::= STRING",
1901  /*   7 */ "expr ::= expr AND expr",
1902  /*   8 */ "expr ::= expr OR expr",
1903  /*   9 */ "expr ::= expr NOT expr",
1904  /*  10 */ "expr ::= colset COLON LP expr RP",
1905  /*  11 */ "expr ::= LP expr RP",
1906  /*  12 */ "expr ::= exprlist",
1907  /*  13 */ "exprlist ::= cnearset",
1908  /*  14 */ "exprlist ::= exprlist cnearset",
1909  /*  15 */ "cnearset ::= nearset",
1910  /*  16 */ "cnearset ::= colset COLON nearset",
1911  /*  17 */ "nearset ::= phrase",
1912  /*  18 */ "nearset ::= CARET phrase",
1913  /*  19 */ "nearset ::= STRING LP nearphrases neardist_opt RP",
1914  /*  20 */ "nearphrases ::= phrase",
1915  /*  21 */ "nearphrases ::= nearphrases phrase",
1916  /*  22 */ "neardist_opt ::=",
1917  /*  23 */ "neardist_opt ::= COMMA STRING",
1918  /*  24 */ "phrase ::= phrase PLUS STRING star_opt",
1919  /*  25 */ "phrase ::= STRING star_opt",
1920  /*  26 */ "star_opt ::= STAR",
1921  /*  27 */ "star_opt ::=",
1922 };
1923 #endif /* NDEBUG */
1924 
1925 
1926 #if fts5YYSTACKDEPTH<=0
1927 /*
1928 ** Try to increase the size of the parser stack.  Return the number
1929 ** of errors.  Return 0 on success.
1930 */
1931 static int fts5yyGrowStack(fts5yyParser *p){
1932   int newSize;
1933   int idx;
1934   fts5yyStackEntry *pNew;
1935 
1936   newSize = p->fts5yystksz*2 + 100;
1937   idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0;
1938   if( p->fts5yystack==&p->fts5yystk0 ){
1939     pNew = malloc(newSize*sizeof(pNew[0]));
1940     if( pNew ) pNew[0] = p->fts5yystk0;
1941   }else{
1942     pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0]));
1943   }
1944   if( pNew ){
1945     p->fts5yystack = pNew;
1946     p->fts5yytos = &p->fts5yystack[idx];
1947 #ifndef NDEBUG
1948     if( fts5yyTraceFILE ){
1949       fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n",
1950               fts5yyTracePrompt, p->fts5yystksz, newSize);
1951     }
1952 #endif
1953     p->fts5yystksz = newSize;
1954   }
1955   return pNew==0;
1956 }
1957 #endif
1958 
1959 /* Datatype of the argument to the memory allocated passed as the
1960 ** second argument to sqlite3Fts5ParserAlloc() below.  This can be changed by
1961 ** putting an appropriate #define in the %include section of the input
1962 ** grammar.
1963 */
1964 #ifndef fts5YYMALLOCARGTYPE
1965 # define fts5YYMALLOCARGTYPE size_t
1966 #endif
1967 
1968 /* Initialize a new parser that has already been allocated.
1969 */
1970 static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){
1971   fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser;
1972   sqlite3Fts5ParserCTX_STORE
1973 #ifdef fts5YYTRACKMAXSTACKDEPTH
1974   fts5yypParser->fts5yyhwm = 0;
1975 #endif
1976 #if fts5YYSTACKDEPTH<=0
1977   fts5yypParser->fts5yytos = NULL;
1978   fts5yypParser->fts5yystack = NULL;
1979   fts5yypParser->fts5yystksz = 0;
1980   if( fts5yyGrowStack(fts5yypParser) ){
1981     fts5yypParser->fts5yystack = &fts5yypParser->fts5yystk0;
1982     fts5yypParser->fts5yystksz = 1;
1983   }
1984 #endif
1985 #ifndef fts5YYNOERRORRECOVERY
1986   fts5yypParser->fts5yyerrcnt = -1;
1987 #endif
1988   fts5yypParser->fts5yytos = fts5yypParser->fts5yystack;
1989   fts5yypParser->fts5yystack[0].stateno = 0;
1990   fts5yypParser->fts5yystack[0].major = 0;
1991 #if fts5YYSTACKDEPTH>0
1992   fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1];
1993 #endif
1994 }
1995 
1996 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
1997 /*
1998 ** This function allocates a new parser.
1999 ** The only argument is a pointer to a function which works like
2000 ** malloc.
2001 **
2002 ** Inputs:
2003 ** A pointer to the function used to allocate memory.
2004 **
2005 ** Outputs:
2006 ** A pointer to a parser.  This pointer is used in subsequent calls
2007 ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree.
2008 */
2009 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE) sqlite3Fts5ParserCTX_PDECL){
2010   fts5yyParser *fts5yypParser;
2011   fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) );
2012   if( fts5yypParser ){
2013     sqlite3Fts5ParserCTX_STORE
2014     sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM);
2015   }
2016   return (void*)fts5yypParser;
2017 }
2018 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2019 
2020 
2021 /* The following function deletes the "minor type" or semantic value
2022 ** associated with a symbol.  The symbol can be either a terminal
2023 ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is
2024 ** a pointer to the value to be deleted.  The code used to do the
2025 ** deletions is derived from the %destructor and/or %token_destructor
2026 ** directives of the input grammar.
2027 */
2028 static void fts5yy_destructor(
2029   fts5yyParser *fts5yypParser,    /* The parser */
2030   fts5YYCODETYPE fts5yymajor,     /* Type code for object to destroy */
2031   fts5YYMINORTYPE *fts5yypminor   /* The object to be destroyed */
2032 ){
2033   sqlite3Fts5ParserARG_FETCH
2034   sqlite3Fts5ParserCTX_FETCH
2035   switch( fts5yymajor ){
2036     /* Here is inserted the actions which take place when a
2037     ** terminal or non-terminal is destroyed.  This can happen
2038     ** when the symbol is popped from the stack during a
2039     ** reduce or during error processing or when a parser is
2040     ** being destroyed before it is finished parsing.
2041     **
2042     ** Note: during a reduce, the only symbols destroyed are those
2043     ** which appear on the RHS of the rule, but which are *not* used
2044     ** inside the C code.
2045     */
2046 /********* Begin destructor definitions ***************************************/
2047     case 16: /* input */
2048 {
2049 #line 83 "fts5parse.y"
2050  (void)pParse;
2051 #line 586 "fts5parse.sql"
2052 }
2053       break;
2054     case 17: /* expr */
2055     case 18: /* cnearset */
2056     case 19: /* exprlist */
2057 {
2058 #line 89 "fts5parse.y"
2059  sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24));
2060 #line 595 "fts5parse.sql"
2061 }
2062       break;
2063     case 20: /* colset */
2064     case 21: /* colsetlist */
2065 {
2066 #line 93 "fts5parse.y"
2067  sqlite3_free((fts5yypminor->fts5yy11));
2068 #line 603 "fts5parse.sql"
2069 }
2070       break;
2071     case 22: /* nearset */
2072     case 23: /* nearphrases */
2073 {
2074 #line 148 "fts5parse.y"
2075  sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46));
2076 #line 611 "fts5parse.sql"
2077 }
2078       break;
2079     case 24: /* phrase */
2080 {
2081 #line 183 "fts5parse.y"
2082  sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53));
2083 #line 618 "fts5parse.sql"
2084 }
2085       break;
2086 /********* End destructor definitions *****************************************/
2087     default:  break;   /* If no destructor action specified: do nothing */
2088   }
2089 }
2090 
2091 /*
2092 ** Pop the parser's stack once.
2093 **
2094 ** If there is a destructor routine associated with the token which
2095 ** is popped from the stack, then call it.
2096 */
2097 static void fts5yy_pop_parser_stack(fts5yyParser *pParser){
2098   fts5yyStackEntry *fts5yytos;
2099   assert( pParser->fts5yytos!=0 );
2100   assert( pParser->fts5yytos > pParser->fts5yystack );
2101   fts5yytos = pParser->fts5yytos--;
2102 #ifndef NDEBUG
2103   if( fts5yyTraceFILE ){
2104     fprintf(fts5yyTraceFILE,"%sPopping %s\n",
2105       fts5yyTracePrompt,
2106       fts5yyTokenName[fts5yytos->major]);
2107   }
2108 #endif
2109   fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor);
2110 }
2111 
2112 /*
2113 ** Clear all secondary memory allocations from the parser
2114 */
2115 static void sqlite3Fts5ParserFinalize(void *p){
2116   fts5yyParser *pParser = (fts5yyParser*)p;
2117   while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pParser);
2118 #if fts5YYSTACKDEPTH<=0
2119   if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack);
2120 #endif
2121 }
2122 
2123 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK
2124 /*
2125 ** Deallocate and destroy a parser.  Destructors are called for
2126 ** all stack elements before shutting the parser down.
2127 **
2128 ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it
2129 ** is defined in a %include section of the input grammar) then it is
2130 ** assumed that the input pointer is never NULL.
2131 */
2132 static void sqlite3Fts5ParserFree(
2133   void *p,                    /* The parser to be deleted */
2134   void (*freeProc)(void*)     /* Function used to reclaim memory */
2135 ){
2136 #ifndef fts5YYPARSEFREENEVERNULL
2137   if( p==0 ) return;
2138 #endif
2139   sqlite3Fts5ParserFinalize(p);
2140   (*freeProc)(p);
2141 }
2142 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */
2143 
2144 /*
2145 ** Return the peak depth of the stack for a parser.
2146 */
2147 #ifdef fts5YYTRACKMAXSTACKDEPTH
2148 static int sqlite3Fts5ParserStackPeak(void *p){
2149   fts5yyParser *pParser = (fts5yyParser*)p;
2150   return pParser->fts5yyhwm;
2151 }
2152 #endif
2153 
2154 /* This array of booleans keeps track of the parser statement
2155 ** coverage.  The element fts5yycoverage[X][Y] is set when the parser
2156 ** is in state X and has a lookahead token Y.  In a well-tested
2157 ** systems, every element of this matrix should end up being set.
2158 */
2159 #if defined(fts5YYCOVERAGE)
2160 static unsigned char fts5yycoverage[fts5YYNSTATE][fts5YYNFTS5TOKEN];
2161 #endif
2162 
2163 /*
2164 ** Write into out a description of every state/lookahead combination that
2165 **
2166 **   (1)  has not been used by the parser, and
2167 **   (2)  is not a syntax error.
2168 **
2169 ** Return the number of missed state/lookahead combinations.
2170 */
2171 #if defined(fts5YYCOVERAGE)
2172 static int sqlite3Fts5ParserCoverage(FILE *out){
2173   int stateno, iLookAhead, i;
2174   int nMissed = 0;
2175   for(stateno=0; stateno<fts5YYNSTATE; stateno++){
2176     i = fts5yy_shift_ofst[stateno];
2177     for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN; iLookAhead++){
2178       if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue;
2179       if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++;
2180       if( out ){
2181         fprintf(out,"State %d lookahead %s %s\n", stateno,
2182                 fts5yyTokenName[iLookAhead],
2183                 fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed");
2184       }
2185     }
2186   }
2187   return nMissed;
2188 }
2189 #endif
2190 
2191 /*
2192 ** Find the appropriate action for a parser given the terminal
2193 ** look-ahead token iLookAhead.
2194 */
2195 static fts5YYACTIONTYPE fts5yy_find_shift_action(
2196   fts5YYCODETYPE iLookAhead,    /* The look-ahead token */
2197   fts5YYACTIONTYPE stateno      /* Current state number */
2198 ){
2199   int i;
2200 
2201   if( stateno>fts5YY_MAX_SHIFT ) return stateno;
2202   assert( stateno <= fts5YY_SHIFT_COUNT );
2203 #if defined(fts5YYCOVERAGE)
2204   fts5yycoverage[stateno][iLookAhead] = 1;
2205 #endif
2206   do{
2207     i = fts5yy_shift_ofst[stateno];
2208     assert( i>=0 );
2209     assert( i<=fts5YY_ACTTAB_COUNT );
2210     assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD );
2211     assert( iLookAhead!=fts5YYNOCODE );
2212     assert( iLookAhead < fts5YYNFTS5TOKEN );
2213     i += iLookAhead;
2214     assert( i<(int)fts5YY_NLOOKAHEAD );
2215     if( fts5yy_lookahead[i]!=iLookAhead ){
2216 #ifdef fts5YYFALLBACK
2217       fts5YYCODETYPE iFallback;            /* Fallback token */
2218       assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) );
2219       iFallback = fts5yyFallback[iLookAhead];
2220       if( iFallback!=0 ){
2221 #ifndef NDEBUG
2222         if( fts5yyTraceFILE ){
2223           fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n",
2224              fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]);
2225         }
2226 #endif
2227         assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */
2228         iLookAhead = iFallback;
2229         continue;
2230       }
2231 #endif
2232 #ifdef fts5YYWILDCARD
2233       {
2234         int j = i - iLookAhead + fts5YYWILDCARD;
2235         assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) );
2236         if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){
2237 #ifndef NDEBUG
2238           if( fts5yyTraceFILE ){
2239             fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n",
2240                fts5yyTracePrompt, fts5yyTokenName[iLookAhead],
2241                fts5yyTokenName[fts5YYWILDCARD]);
2242           }
2243 #endif /* NDEBUG */
2244           return fts5yy_action[j];
2245         }
2246       }
2247 #endif /* fts5YYWILDCARD */
2248       return fts5yy_default[stateno];
2249     }else{
2250       assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) );
2251       return fts5yy_action[i];
2252     }
2253   }while(1);
2254 }
2255 
2256 /*
2257 ** Find the appropriate action for a parser given the non-terminal
2258 ** look-ahead token iLookAhead.
2259 */
2260 static fts5YYACTIONTYPE fts5yy_find_reduce_action(
2261   fts5YYACTIONTYPE stateno,     /* Current state number */
2262   fts5YYCODETYPE iLookAhead     /* The look-ahead token */
2263 ){
2264   int i;
2265 #ifdef fts5YYERRORSYMBOL
2266   if( stateno>fts5YY_REDUCE_COUNT ){
2267     return fts5yy_default[stateno];
2268   }
2269 #else
2270   assert( stateno<=fts5YY_REDUCE_COUNT );
2271 #endif
2272   i = fts5yy_reduce_ofst[stateno];
2273   assert( iLookAhead!=fts5YYNOCODE );
2274   i += iLookAhead;
2275 #ifdef fts5YYERRORSYMBOL
2276   if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){
2277     return fts5yy_default[stateno];
2278   }
2279 #else
2280   assert( i>=0 && i<fts5YY_ACTTAB_COUNT );
2281   assert( fts5yy_lookahead[i]==iLookAhead );
2282 #endif
2283   return fts5yy_action[i];
2284 }
2285 
2286 /*
2287 ** The following routine is called if the stack overflows.
2288 */
2289 static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){
2290    sqlite3Fts5ParserARG_FETCH
2291    sqlite3Fts5ParserCTX_FETCH
2292 #ifndef NDEBUG
2293    if( fts5yyTraceFILE ){
2294      fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt);
2295    }
2296 #endif
2297    while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
2298    /* Here code is inserted which will execute if the parser
2299    ** stack every overflows */
2300 /******** Begin %stack_overflow code ******************************************/
2301 #line 36 "fts5parse.y"
2302 
2303   sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow");
2304 #line 839 "fts5parse.sql"
2305 /******** End %stack_overflow code ********************************************/
2306    sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument var */
2307    sqlite3Fts5ParserCTX_STORE
2308 }
2309 
2310 /*
2311 ** Print tracing information for a SHIFT action
2312 */
2313 #ifndef NDEBUG
2314 static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){
2315   if( fts5yyTraceFILE ){
2316     if( fts5yyNewState<fts5YYNSTATE ){
2317       fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n",
2318          fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2319          fts5yyNewState);
2320     }else{
2321       fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n",
2322          fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major],
2323          fts5yyNewState - fts5YY_MIN_REDUCE);
2324     }
2325   }
2326 }
2327 #else
2328 # define fts5yyTraceShift(X,Y,Z)
2329 #endif
2330 
2331 /*
2332 ** Perform a shift action.
2333 */
2334 static void fts5yy_shift(
2335   fts5yyParser *fts5yypParser,          /* The parser to be shifted */
2336   fts5YYACTIONTYPE fts5yyNewState,      /* The new state to shift in */
2337   fts5YYCODETYPE fts5yyMajor,           /* The major token to shift in */
2338   sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor        /* The minor token to shift in */
2339 ){
2340   fts5yyStackEntry *fts5yytos;
2341   fts5yypParser->fts5yytos++;
2342 #ifdef fts5YYTRACKMAXSTACKDEPTH
2343   if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
2344     fts5yypParser->fts5yyhwm++;
2345     assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) );
2346   }
2347 #endif
2348 #if fts5YYSTACKDEPTH>0
2349   if( fts5yypParser->fts5yytos>fts5yypParser->fts5yystackEnd ){
2350     fts5yypParser->fts5yytos--;
2351     fts5yyStackOverflow(fts5yypParser);
2352     return;
2353   }
2354 #else
2355   if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz] ){
2356     if( fts5yyGrowStack(fts5yypParser) ){
2357       fts5yypParser->fts5yytos--;
2358       fts5yyStackOverflow(fts5yypParser);
2359       return;
2360     }
2361   }
2362 #endif
2363   if( fts5yyNewState > fts5YY_MAX_SHIFT ){
2364     fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE;
2365   }
2366   fts5yytos = fts5yypParser->fts5yytos;
2367   fts5yytos->stateno = fts5yyNewState;
2368   fts5yytos->major = fts5yyMajor;
2369   fts5yytos->minor.fts5yy0 = fts5yyMinor;
2370   fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift");
2371 }
2372 
2373 /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side
2374 ** of that rule */
2375 static const fts5YYCODETYPE fts5yyRuleInfoLhs[] = {
2376     16,  /* (0) input ::= expr */
2377     20,  /* (1) colset ::= MINUS LCP colsetlist RCP */
2378     20,  /* (2) colset ::= LCP colsetlist RCP */
2379     20,  /* (3) colset ::= STRING */
2380     20,  /* (4) colset ::= MINUS STRING */
2381     21,  /* (5) colsetlist ::= colsetlist STRING */
2382     21,  /* (6) colsetlist ::= STRING */
2383     17,  /* (7) expr ::= expr AND expr */
2384     17,  /* (8) expr ::= expr OR expr */
2385     17,  /* (9) expr ::= expr NOT expr */
2386     17,  /* (10) expr ::= colset COLON LP expr RP */
2387     17,  /* (11) expr ::= LP expr RP */
2388     17,  /* (12) expr ::= exprlist */
2389     19,  /* (13) exprlist ::= cnearset */
2390     19,  /* (14) exprlist ::= exprlist cnearset */
2391     18,  /* (15) cnearset ::= nearset */
2392     18,  /* (16) cnearset ::= colset COLON nearset */
2393     22,  /* (17) nearset ::= phrase */
2394     22,  /* (18) nearset ::= CARET phrase */
2395     22,  /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2396     23,  /* (20) nearphrases ::= phrase */
2397     23,  /* (21) nearphrases ::= nearphrases phrase */
2398     25,  /* (22) neardist_opt ::= */
2399     25,  /* (23) neardist_opt ::= COMMA STRING */
2400     24,  /* (24) phrase ::= phrase PLUS STRING star_opt */
2401     24,  /* (25) phrase ::= STRING star_opt */
2402     26,  /* (26) star_opt ::= STAR */
2403     26,  /* (27) star_opt ::= */
2404 };
2405 
2406 /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number
2407 ** of symbols on the right-hand side of that rule. */
2408 static const signed char fts5yyRuleInfoNRhs[] = {
2409    -1,  /* (0) input ::= expr */
2410    -4,  /* (1) colset ::= MINUS LCP colsetlist RCP */
2411    -3,  /* (2) colset ::= LCP colsetlist RCP */
2412    -1,  /* (3) colset ::= STRING */
2413    -2,  /* (4) colset ::= MINUS STRING */
2414    -2,  /* (5) colsetlist ::= colsetlist STRING */
2415    -1,  /* (6) colsetlist ::= STRING */
2416    -3,  /* (7) expr ::= expr AND expr */
2417    -3,  /* (8) expr ::= expr OR expr */
2418    -3,  /* (9) expr ::= expr NOT expr */
2419    -5,  /* (10) expr ::= colset COLON LP expr RP */
2420    -3,  /* (11) expr ::= LP expr RP */
2421    -1,  /* (12) expr ::= exprlist */
2422    -1,  /* (13) exprlist ::= cnearset */
2423    -2,  /* (14) exprlist ::= exprlist cnearset */
2424    -1,  /* (15) cnearset ::= nearset */
2425    -3,  /* (16) cnearset ::= colset COLON nearset */
2426    -1,  /* (17) nearset ::= phrase */
2427    -2,  /* (18) nearset ::= CARET phrase */
2428    -5,  /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */
2429    -1,  /* (20) nearphrases ::= phrase */
2430    -2,  /* (21) nearphrases ::= nearphrases phrase */
2431     0,  /* (22) neardist_opt ::= */
2432    -2,  /* (23) neardist_opt ::= COMMA STRING */
2433    -4,  /* (24) phrase ::= phrase PLUS STRING star_opt */
2434    -2,  /* (25) phrase ::= STRING star_opt */
2435    -1,  /* (26) star_opt ::= STAR */
2436     0,  /* (27) star_opt ::= */
2437 };
2438 
2439 static void fts5yy_accept(fts5yyParser*);  /* Forward Declaration */
2440 
2441 /*
2442 ** Perform a reduce action and the shift that must immediately
2443 ** follow the reduce.
2444 **
2445 ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions
2446 ** access to the lookahead token (if any).  The fts5yyLookahead will be fts5YYNOCODE
2447 ** if the lookahead token has already been consumed.  As this procedure is
2448 ** only called from one place, optimizing compilers will in-line it, which
2449 ** means that the extra parameters have no performance impact.
2450 */
2451 static fts5YYACTIONTYPE fts5yy_reduce(
2452   fts5yyParser *fts5yypParser,         /* The parser */
2453   unsigned int fts5yyruleno,       /* Number of the rule by which to reduce */
2454   int fts5yyLookahead,             /* Lookahead token, or fts5YYNOCODE if none */
2455   sqlite3Fts5ParserFTS5TOKENTYPE fts5yyLookaheadToken  /* Value of the lookahead token */
2456   sqlite3Fts5ParserCTX_PDECL                   /* %extra_context */
2457 ){
2458   int fts5yygoto;                     /* The next state */
2459   fts5YYACTIONTYPE fts5yyact;             /* The next action */
2460   fts5yyStackEntry *fts5yymsp;            /* The top of the parser's stack */
2461   int fts5yysize;                     /* Amount to pop the stack */
2462   sqlite3Fts5ParserARG_FETCH
2463   (void)fts5yyLookahead;
2464   (void)fts5yyLookaheadToken;
2465   fts5yymsp = fts5yypParser->fts5yytos;
2466 
2467   switch( fts5yyruleno ){
2468   /* Beginning here are the reduction cases.  A typical example
2469   ** follows:
2470   **   case 0:
2471   **  #line <lineno> <grammarfile>
2472   **     { ... }           // User supplied code
2473   **  #line <lineno> <thisfile>
2474   **     break;
2475   */
2476 /********** Begin reduce actions **********************************************/
2477         fts5YYMINORTYPE fts5yylhsminor;
2478       case 0: /* input ::= expr */
2479 #line 82 "fts5parse.y"
2480 { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); }
2481 #line 1016 "fts5parse.sql"
2482         break;
2483       case 1: /* colset ::= MINUS LCP colsetlist RCP */
2484 #line 97 "fts5parse.y"
2485 {
2486     fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2487 }
2488 #line 1023 "fts5parse.sql"
2489         break;
2490       case 2: /* colset ::= LCP colsetlist RCP */
2491 #line 100 "fts5parse.y"
2492 { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; }
2493 #line 1028 "fts5parse.sql"
2494         break;
2495       case 3: /* colset ::= STRING */
2496 #line 101 "fts5parse.y"
2497 {
2498   fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2499 }
2500 #line 1035 "fts5parse.sql"
2501   fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2502         break;
2503       case 4: /* colset ::= MINUS STRING */
2504 #line 104 "fts5parse.y"
2505 {
2506   fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2507   fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11);
2508 }
2509 #line 1044 "fts5parse.sql"
2510         break;
2511       case 5: /* colsetlist ::= colsetlist STRING */
2512 #line 109 "fts5parse.y"
2513 {
2514   fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); }
2515 #line 1050 "fts5parse.sql"
2516   fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2517         break;
2518       case 6: /* colsetlist ::= STRING */
2519 #line 111 "fts5parse.y"
2520 {
2521   fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0);
2522 }
2523 #line 1058 "fts5parse.sql"
2524   fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11;
2525         break;
2526       case 7: /* expr ::= expr AND expr */
2527 #line 115 "fts5parse.y"
2528 {
2529   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2530 }
2531 #line 1066 "fts5parse.sql"
2532   fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2533         break;
2534       case 8: /* expr ::= expr OR expr */
2535 #line 118 "fts5parse.y"
2536 {
2537   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2538 }
2539 #line 1074 "fts5parse.sql"
2540   fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2541         break;
2542       case 9: /* expr ::= expr NOT expr */
2543 #line 121 "fts5parse.y"
2544 {
2545   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0);
2546 }
2547 #line 1082 "fts5parse.sql"
2548   fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2549         break;
2550       case 10: /* expr ::= colset COLON LP expr RP */
2551 #line 125 "fts5parse.y"
2552 {
2553   sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11);
2554   fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;
2555 }
2556 #line 1091 "fts5parse.sql"
2557   fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2558         break;
2559       case 11: /* expr ::= LP expr RP */
2560 #line 129 "fts5parse.y"
2561 {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;}
2562 #line 1097 "fts5parse.sql"
2563         break;
2564       case 12: /* expr ::= exprlist */
2565       case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13);
2566 #line 130 "fts5parse.y"
2567 {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;}
2568 #line 1103 "fts5parse.sql"
2569   fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2570         break;
2571       case 14: /* exprlist ::= exprlist cnearset */
2572 #line 133 "fts5parse.y"
2573 {
2574   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24);
2575 }
2576 #line 1111 "fts5parse.sql"
2577   fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2578         break;
2579       case 15: /* cnearset ::= nearset */
2580 #line 137 "fts5parse.y"
2581 {
2582   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46);
2583 }
2584 #line 1119 "fts5parse.sql"
2585   fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2586         break;
2587       case 16: /* cnearset ::= colset COLON nearset */
2588 #line 140 "fts5parse.y"
2589 {
2590   fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46);
2591   sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11);
2592 }
2593 #line 1128 "fts5parse.sql"
2594   fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24;
2595         break;
2596       case 17: /* nearset ::= phrase */
2597 #line 151 "fts5parse.y"
2598 { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); }
2599 #line 1134 "fts5parse.sql"
2600   fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2601         break;
2602       case 18: /* nearset ::= CARET phrase */
2603 #line 152 "fts5parse.y"
2604 {
2605   sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53);
2606   fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2607 }
2608 #line 1143 "fts5parse.sql"
2609         break;
2610       case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */
2611 #line 156 "fts5parse.y"
2612 {
2613   sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0);
2614   sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0);
2615   fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46;
2616 }
2617 #line 1152 "fts5parse.sql"
2618   fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2619         break;
2620       case 20: /* nearphrases ::= phrase */
2621 #line 162 "fts5parse.y"
2622 {
2623   fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53);
2624 }
2625 #line 1160 "fts5parse.sql"
2626   fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2627         break;
2628       case 21: /* nearphrases ::= nearphrases phrase */
2629 #line 165 "fts5parse.y"
2630 {
2631   fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53);
2632 }
2633 #line 1168 "fts5parse.sql"
2634   fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46;
2635         break;
2636       case 22: /* neardist_opt ::= */
2637 #line 172 "fts5parse.y"
2638 { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; }
2639 #line 1174 "fts5parse.sql"
2640         break;
2641       case 23: /* neardist_opt ::= COMMA STRING */
2642 #line 173 "fts5parse.y"
2643 { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; }
2644 #line 1179 "fts5parse.sql"
2645         break;
2646       case 24: /* phrase ::= phrase PLUS STRING star_opt */
2647 #line 185 "fts5parse.y"
2648 {
2649   fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2650 }
2651 #line 1186 "fts5parse.sql"
2652   fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2653         break;
2654       case 25: /* phrase ::= STRING star_opt */
2655 #line 188 "fts5parse.y"
2656 {
2657   fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4);
2658 }
2659 #line 1194 "fts5parse.sql"
2660   fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53;
2661         break;
2662       case 26: /* star_opt ::= STAR */
2663 #line 196 "fts5parse.y"
2664 { fts5yymsp[0].minor.fts5yy4 = 1; }
2665 #line 1200 "fts5parse.sql"
2666         break;
2667       case 27: /* star_opt ::= */
2668 #line 197 "fts5parse.y"
2669 { fts5yymsp[1].minor.fts5yy4 = 0; }
2670 #line 1205 "fts5parse.sql"
2671         break;
2672       default:
2673         break;
2674 /********** End reduce actions ************************************************/
2675   };
2676   assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) );
2677   fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno];
2678   fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
2679   fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPE)fts5yygoto);
2680 
2681   /* There are no SHIFTREDUCE actions on nonterminals because the table
2682   ** generator has simplified them to pure REDUCE actions. */
2683   assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) );
2684 
2685   /* It is not possible for a REDUCE to be followed by an error */
2686   assert( fts5yyact!=fts5YY_ERROR_ACTION );
2687 
2688   fts5yymsp += fts5yysize+1;
2689   fts5yypParser->fts5yytos = fts5yymsp;
2690   fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact;
2691   fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto;
2692   fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift");
2693   return fts5yyact;
2694 }
2695 
2696 /*
2697 ** The following code executes when the parse fails
2698 */
2699 #ifndef fts5YYNOERRORRECOVERY
2700 static void fts5yy_parse_failed(
2701   fts5yyParser *fts5yypParser           /* The parser */
2702 ){
2703   sqlite3Fts5ParserARG_FETCH
2704   sqlite3Fts5ParserCTX_FETCH
2705 #ifndef NDEBUG
2706   if( fts5yyTraceFILE ){
2707     fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt);
2708   }
2709 #endif
2710   while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser);
2711   /* Here code is inserted which will be executed whenever the
2712   ** parser fails */
2713 /************ Begin %parse_failure code ***************************************/
2714 /************ End %parse_failure code *****************************************/
2715   sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2716   sqlite3Fts5ParserCTX_STORE
2717 }
2718 #endif /* fts5YYNOERRORRECOVERY */
2719 
2720 /*
2721 ** The following code executes when a syntax error first occurs.
2722 */
2723 static void fts5yy_syntax_error(
2724   fts5yyParser *fts5yypParser,           /* The parser */
2725   int fts5yymajor,                   /* The major type of the error token */
2726   sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor         /* The minor type of the error token */
2727 ){
2728   sqlite3Fts5ParserARG_FETCH
2729   sqlite3Fts5ParserCTX_FETCH
2730 #define FTS5TOKEN fts5yyminor
2731 /************ Begin %syntax_error code ****************************************/
2732 #line 30 "fts5parse.y"
2733 
2734   UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */
2735   sqlite3Fts5ParseError(
2736     pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p
2737   );
2738 #line 1273 "fts5parse.sql"
2739 /************ End %syntax_error code ******************************************/
2740   sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2741   sqlite3Fts5ParserCTX_STORE
2742 }
2743 
2744 /*
2745 ** The following is executed when the parser accepts
2746 */
2747 static void fts5yy_accept(
2748   fts5yyParser *fts5yypParser           /* The parser */
2749 ){
2750   sqlite3Fts5ParserARG_FETCH
2751   sqlite3Fts5ParserCTX_FETCH
2752 #ifndef NDEBUG
2753   if( fts5yyTraceFILE ){
2754     fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt);
2755   }
2756 #endif
2757 #ifndef fts5YYNOERRORRECOVERY
2758   fts5yypParser->fts5yyerrcnt = -1;
2759 #endif
2760   assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack );
2761   /* Here code is inserted which will be executed whenever the
2762   ** parser accepts */
2763 /*********** Begin %parse_accept code *****************************************/
2764 /*********** End %parse_accept code *******************************************/
2765   sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */
2766   sqlite3Fts5ParserCTX_STORE
2767 }
2768 
2769 /* The main parser program.
2770 ** The first argument is a pointer to a structure obtained from
2771 ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser.
2772 ** The second argument is the major token number.  The third is
2773 ** the minor token.  The fourth optional argument is whatever the
2774 ** user wants (and specified in the grammar) and is available for
2775 ** use by the action routines.
2776 **
2777 ** Inputs:
2778 ** <ul>
2779 ** <li> A pointer to the parser (an opaque structure.)
2780 ** <li> The major token number.
2781 ** <li> The minor token number.
2782 ** <li> An option argument of a grammar-specified type.
2783 ** </ul>
2784 **
2785 ** Outputs:
2786 ** None.
2787 */
2788 static void sqlite3Fts5Parser(
2789   void *fts5yyp,                   /* The parser */
2790   int fts5yymajor,                 /* The major token code number */
2791   sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor       /* The value for the token */
2792   sqlite3Fts5ParserARG_PDECL               /* Optional %extra_argument parameter */
2793 ){
2794   fts5YYMINORTYPE fts5yyminorunion;
2795   fts5YYACTIONTYPE fts5yyact;   /* The parser action. */
2796 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2797   int fts5yyendofinput;     /* True if we are at the end of input */
2798 #endif
2799 #ifdef fts5YYERRORSYMBOL
2800   int fts5yyerrorhit = 0;   /* True if fts5yymajor has invoked an error */
2801 #endif
2802   fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp;  /* The parser */
2803   sqlite3Fts5ParserCTX_FETCH
2804   sqlite3Fts5ParserARG_STORE
2805 
2806   assert( fts5yypParser->fts5yytos!=0 );
2807 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY)
2808   fts5yyendofinput = (fts5yymajor==0);
2809 #endif
2810 
2811   fts5yyact = fts5yypParser->fts5yytos->stateno;
2812 #ifndef NDEBUG
2813   if( fts5yyTraceFILE ){
2814     if( fts5yyact < fts5YY_MIN_REDUCE ){
2815       fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n",
2816               fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact);
2817     }else{
2818       fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n",
2819               fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE);
2820     }
2821   }
2822 #endif
2823 
2824   while(1){ /* Exit by "break" */
2825     assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack );
2826     assert( fts5yyact==fts5yypParser->fts5yytos->stateno );
2827     fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPE)fts5yymajor,fts5yyact);
2828     if( fts5yyact >= fts5YY_MIN_REDUCE ){
2829       unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE; /* Reduce by this rule */
2830 #ifndef NDEBUG
2831       assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) );
2832       if( fts5yyTraceFILE ){
2833         int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno];
2834         if( fts5yysize ){
2835           fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n",
2836             fts5yyTracePrompt,
2837             fts5yyruleno, fts5yyRuleName[fts5yyruleno],
2838             fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action",
2839             fts5yypParser->fts5yytos[fts5yysize].stateno);
2840         }else{
2841           fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n",
2842             fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno],
2843             fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action");
2844         }
2845       }
2846 #endif /* NDEBUG */
2847 
2848       /* Check that the stack is large enough to grow by a single entry
2849       ** if the RHS of the rule is empty.  This ensures that there is room
2850       ** enough on the stack to push the LHS value */
2851       if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){
2852 #ifdef fts5YYTRACKMAXSTACKDEPTH
2853         if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){
2854           fts5yypParser->fts5yyhwm++;
2855           assert( fts5yypParser->fts5yyhwm ==
2856                   (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack));
2857         }
2858 #endif
2859 #if fts5YYSTACKDEPTH>0
2860         if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){
2861           fts5yyStackOverflow(fts5yypParser);
2862           break;
2863         }
2864 #else
2865         if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz-1] ){
2866           if( fts5yyGrowStack(fts5yypParser) ){
2867             fts5yyStackOverflow(fts5yypParser);
2868             break;
2869           }
2870         }
2871 #endif
2872       }
2873       fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM);
2874     }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){
2875       fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPE)fts5yymajor,fts5yyminor);
2876 #ifndef fts5YYNOERRORRECOVERY
2877       fts5yypParser->fts5yyerrcnt--;
2878 #endif
2879       break;
2880     }else if( fts5yyact==fts5YY_ACCEPT_ACTION ){
2881       fts5yypParser->fts5yytos--;
2882       fts5yy_accept(fts5yypParser);
2883       return;
2884     }else{
2885       assert( fts5yyact == fts5YY_ERROR_ACTION );
2886       fts5yyminorunion.fts5yy0 = fts5yyminor;
2887 #ifdef fts5YYERRORSYMBOL
2888       int fts5yymx;
2889 #endif
2890 #ifndef NDEBUG
2891       if( fts5yyTraceFILE ){
2892         fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt);
2893       }
2894 #endif
2895 #ifdef fts5YYERRORSYMBOL
2896       /* A syntax error has occurred.
2897       ** The response to an error depends upon whether or not the
2898       ** grammar defines an error token "ERROR".
2899       **
2900       ** This is what we do if the grammar does define ERROR:
2901       **
2902       **  * Call the %syntax_error function.
2903       **
2904       **  * Begin popping the stack until we enter a state where
2905       **    it is legal to shift the error symbol, then shift
2906       **    the error symbol.
2907       **
2908       **  * Set the error count to three.
2909       **
2910       **  * Begin accepting and shifting new tokens.  No new error
2911       **    processing will occur until three tokens have been
2912       **    shifted successfully.
2913       **
2914       */
2915       if( fts5yypParser->fts5yyerrcnt<0 ){
2916         fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor);
2917       }
2918       fts5yymx = fts5yypParser->fts5yytos->major;
2919       if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){
2920 #ifndef NDEBUG
2921         if( fts5yyTraceFILE ){
2922           fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n",
2923              fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]);
2924         }
2925 #endif
2926         fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion);
2927         fts5yymajor = fts5YYNOCODE;
2928       }else{
2929         while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){
2930           fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno,
2931                                         fts5YYERRORSYMBOL);
2932           if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE ) break;
2933           fts5yy_pop_parser_stack(fts5yypParser);
2934         }
2935         if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){
2936           fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2937           fts5yy_parse_failed(fts5yypParser);
2938 #ifndef fts5YYNOERRORRECOVERY
2939           fts5yypParser->fts5yyerrcnt = -1;
2940 #endif
2941           fts5yymajor = fts5YYNOCODE;
2942         }else if( fts5yymx!=fts5YYERRORSYMBOL ){
2943           fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor);
2944         }
2945       }
2946       fts5yypParser->fts5yyerrcnt = 3;
2947       fts5yyerrorhit = 1;
2948       if( fts5yymajor==fts5YYNOCODE ) break;
2949       fts5yyact = fts5yypParser->fts5yytos->stateno;
2950 #elif defined(fts5YYNOERRORRECOVERY)
2951       /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to
2952       ** do any kind of error recovery.  Instead, simply invoke the syntax
2953       ** error routine and continue going as if nothing had happened.
2954       **
2955       ** Applications can set this macro (for example inside %include) if
2956       ** they intend to abandon the parse upon the first syntax error seen.
2957       */
2958       fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2959       fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2960       break;
2961 #else  /* fts5YYERRORSYMBOL is not defined */
2962       /* This is what we do if the grammar does not define ERROR:
2963       **
2964       **  * Report an error message, and throw away the input token.
2965       **
2966       **  * If the input token is $, then fail the parse.
2967       **
2968       ** As before, subsequent error messages are suppressed until
2969       ** three input tokens have been successfully shifted.
2970       */
2971       if( fts5yypParser->fts5yyerrcnt<=0 ){
2972         fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor);
2973       }
2974       fts5yypParser->fts5yyerrcnt = 3;
2975       fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion);
2976       if( fts5yyendofinput ){
2977         fts5yy_parse_failed(fts5yypParser);
2978 #ifndef fts5YYNOERRORRECOVERY
2979         fts5yypParser->fts5yyerrcnt = -1;
2980 #endif
2981       }
2982       break;
2983 #endif
2984     }
2985   }
2986 #ifndef NDEBUG
2987   if( fts5yyTraceFILE ){
2988     fts5yyStackEntry *i;
2989     char cDiv = '[';
2990     fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt);
2991     for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){
2992       fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]);
2993       cDiv = ' ';
2994     }
2995     fprintf(fts5yyTraceFILE,"]\n");
2996   }
2997 #endif
2998   return;
2999 }
3000 
3001 /*
3002 ** Return the fallback token corresponding to canonical token iToken, or
3003 ** 0 if iToken has no fallback.
3004 */
3005 static int sqlite3Fts5ParserFallback(int iToken){
3006 #ifdef fts5YYFALLBACK
3007   assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) );
3008   return fts5yyFallback[iToken];
3009 #else
3010   (void)iToken;
3011   return 0;
3012 #endif
3013 }
3014 
3015 #line 1 "fts5_aux.c"
3016 /*
3017 ** 2014 May 31
3018 **
3019 ** The author disclaims copyright to this source code.  In place of
3020 ** a legal notice, here is a blessing:
3021 **
3022 **    May you do good and not evil.
3023 **    May you find forgiveness for yourself and forgive others.
3024 **    May you share freely, never taking more than you give.
3025 **
3026 ******************************************************************************
3027 */
3028 
3029 
3030 /* #include "fts5Int.h" */
3031 #include <math.h>                 /* amalgamator: keep */
3032 
3033 /*
3034 ** Object used to iterate through all "coalesced phrase instances" in
3035 ** a single column of the current row. If the phrase instances in the
3036 ** column being considered do not overlap, this object simply iterates
3037 ** through them. Or, if they do overlap (share one or more tokens in
3038 ** common), each set of overlapping instances is treated as a single
3039 ** match. See documentation for the highlight() auxiliary function for
3040 ** details.
3041 **
3042 ** Usage is:
3043 **
3044 **   for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter);
3045 **      (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter);
3046 **      rc = fts5CInstIterNext(&iter)
3047 **   ){
3048 **     printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd);
3049 **   }
3050 **
3051 */
3052 typedef struct CInstIter CInstIter;
3053 struct CInstIter {
3054   const Fts5ExtensionApi *pApi;   /* API offered by current FTS version */
3055   Fts5Context *pFts;              /* First arg to pass to pApi functions */
3056   int iCol;                       /* Column to search */
3057   int iInst;                      /* Next phrase instance index */
3058   int nInst;                      /* Total number of phrase instances */
3059 
3060   /* Output variables */
3061   int iStart;                     /* First token in coalesced phrase instance */
3062   int iEnd;                       /* Last token in coalesced phrase instance */
3063 };
3064 
3065 /*
3066 ** Advance the iterator to the next coalesced phrase instance. Return
3067 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise.
3068 */
3069 static int fts5CInstIterNext(CInstIter *pIter){
3070   int rc = SQLITE_OK;
3071   pIter->iStart = -1;
3072   pIter->iEnd = -1;
3073 
3074   while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){
3075     int ip; int ic; int io;
3076     rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io);
3077     if( rc==SQLITE_OK ){
3078       if( ic==pIter->iCol ){
3079         int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip);
3080         if( pIter->iStart<0 ){
3081           pIter->iStart = io;
3082           pIter->iEnd = iEnd;
3083         }else if( io<=pIter->iEnd ){
3084           if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd;
3085         }else{
3086           break;
3087         }
3088       }
3089       pIter->iInst++;
3090     }
3091   }
3092 
3093   return rc;
3094 }
3095 
3096 /*
3097 ** Initialize the iterator object indicated by the final parameter to
3098 ** iterate through coalesced phrase instances in column iCol.
3099 */
3100 static int fts5CInstIterInit(
3101   const Fts5ExtensionApi *pApi,
3102   Fts5Context *pFts,
3103   int iCol,
3104   CInstIter *pIter
3105 ){
3106   int rc;
3107 
3108   memset(pIter, 0, sizeof(CInstIter));
3109   pIter->pApi = pApi;
3110   pIter->pFts = pFts;
3111   pIter->iCol = iCol;
3112   rc = pApi->xInstCount(pFts, &pIter->nInst);
3113 
3114   if( rc==SQLITE_OK ){
3115     rc = fts5CInstIterNext(pIter);
3116   }
3117 
3118   return rc;
3119 }
3120 
3121 
3122 
3123 /*************************************************************************
3124 ** Start of highlight() implementation.
3125 */
3126 typedef struct HighlightContext HighlightContext;
3127 struct HighlightContext {
3128   CInstIter iter;                 /* Coalesced Instance Iterator */
3129   int iPos;                       /* Current token offset in zIn[] */
3130   int iRangeStart;                /* First token to include */
3131   int iRangeEnd;                  /* If non-zero, last token to include */
3132   const char *zOpen;              /* Opening highlight */
3133   const char *zClose;             /* Closing highlight */
3134   const char *zIn;                /* Input text */
3135   int nIn;                        /* Size of input text in bytes */
3136   int iOff;                       /* Current offset within zIn[] */
3137   char *zOut;                     /* Output value */
3138 };
3139 
3140 /*
3141 ** Append text to the HighlightContext output string - p->zOut. Argument
3142 ** z points to a buffer containing n bytes of text to append. If n is
3143 ** negative, everything up until the first '\0' is appended to the output.
3144 **
3145 ** If *pRc is set to any value other than SQLITE_OK when this function is
3146 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered,
3147 ** *pRc is set to an error code before returning.
3148 */
3149 static void fts5HighlightAppend(
3150   int *pRc,
3151   HighlightContext *p,
3152   const char *z, int n
3153 ){
3154   if( *pRc==SQLITE_OK && z ){
3155     if( n<0 ) n = (int)strlen(z);
3156     p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z);
3157     if( p->zOut==0 ) *pRc = SQLITE_NOMEM;
3158   }
3159 }
3160 
3161 /*
3162 ** Tokenizer callback used by implementation of highlight() function.
3163 */
3164 static int fts5HighlightCb(
3165   void *pContext,                 /* Pointer to HighlightContext object */
3166   int tflags,                     /* Mask of FTS5_TOKEN_* flags */
3167   const char *pToken,             /* Buffer containing token */
3168   int nToken,                     /* Size of token in bytes */
3169   int iStartOff,                  /* Start offset of token */
3170   int iEndOff                     /* End offset of token */
3171 ){
3172   HighlightContext *p = (HighlightContext*)pContext;
3173   int rc = SQLITE_OK;
3174   int iPos;
3175 
3176   UNUSED_PARAM2(pToken, nToken);
3177 
3178   if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
3179   iPos = p->iPos++;
3180 
3181   if( p->iRangeEnd>0 ){
3182     if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;
3183     if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff;
3184   }
3185 
3186   if( iPos==p->iter.iStart ){
3187     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff);
3188     fts5HighlightAppend(&rc, p, p->zOpen, -1);
3189     p->iOff = iStartOff;
3190   }
3191 
3192   if( iPos==p->iter.iEnd ){
3193     if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){
3194       fts5HighlightAppend(&rc, p, p->zOpen, -1);
3195     }
3196     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3197     fts5HighlightAppend(&rc, p, p->zClose, -1);
3198     p->iOff = iEndOff;
3199     if( rc==SQLITE_OK ){
3200       rc = fts5CInstIterNext(&p->iter);
3201     }
3202   }
3203 
3204   if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){
3205     fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff);
3206     p->iOff = iEndOff;
3207     if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){
3208       fts5HighlightAppend(&rc, p, p->zClose, -1);
3209     }
3210   }
3211 
3212   return rc;
3213 }
3214 
3215 /*
3216 ** Implementation of highlight() function.
3217 */
3218 static void fts5HighlightFunction(
3219   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
3220   Fts5Context *pFts,              /* First arg to pass to pApi functions */
3221   sqlite3_context *pCtx,          /* Context for returning result/error */
3222   int nVal,                       /* Number of values in apVal[] array */
3223   sqlite3_value **apVal           /* Array of trailing arguments */
3224 ){
3225   HighlightContext ctx;
3226   int rc;
3227   int iCol;
3228 
3229   if( nVal!=3 ){
3230     const char *zErr = "wrong number of arguments to function highlight()";
3231     sqlite3_result_error(pCtx, zErr, -1);
3232     return;
3233   }
3234 
3235   iCol = sqlite3_value_int(apVal[0]);
3236   memset(&ctx, 0, sizeof(HighlightContext));
3237   ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]);
3238   ctx.zClose = (const char*)sqlite3_value_text(apVal[2]);
3239   rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn);
3240 
3241   if( ctx.zIn ){
3242     if( rc==SQLITE_OK ){
3243       rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter);
3244     }
3245 
3246     if( rc==SQLITE_OK ){
3247       rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
3248     }
3249     fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3250 
3251     if( rc==SQLITE_OK ){
3252       sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
3253     }
3254     sqlite3_free(ctx.zOut);
3255   }
3256   if( rc!=SQLITE_OK ){
3257     sqlite3_result_error_code(pCtx, rc);
3258   }
3259 }
3260 /*
3261 ** End of highlight() implementation.
3262 **************************************************************************/
3263 
3264 /*
3265 ** Context object passed to the fts5SentenceFinderCb() function.
3266 */
3267 typedef struct Fts5SFinder Fts5SFinder;
3268 struct Fts5SFinder {
3269   int iPos;                       /* Current token position */
3270   int nFirstAlloc;                /* Allocated size of aFirst[] */
3271   int nFirst;                     /* Number of entries in aFirst[] */
3272   int *aFirst;                    /* Array of first token in each sentence */
3273   const char *zDoc;               /* Document being tokenized */
3274 };
3275 
3276 /*
3277 ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if
3278 ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an
3279 ** error occurs.
3280 */
3281 static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){
3282   if( p->nFirstAlloc==p->nFirst ){
3283     int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64;
3284     int *aNew;
3285 
3286     aNew = (int*)sqlite3_realloc64(p->aFirst, nNew*sizeof(int));
3287     if( aNew==0 ) return SQLITE_NOMEM;
3288     p->aFirst = aNew;
3289     p->nFirstAlloc = nNew;
3290   }
3291   p->aFirst[p->nFirst++] = iAdd;
3292   return SQLITE_OK;
3293 }
3294 
3295 /*
3296 ** This function is an xTokenize() callback used by the auxiliary snippet()
3297 ** function. Its job is to identify tokens that are the first in a sentence.
3298 ** For each such token, an entry is added to the SFinder.aFirst[] array.
3299 */
3300 static int fts5SentenceFinderCb(
3301   void *pContext,                 /* Pointer to HighlightContext object */
3302   int tflags,                     /* Mask of FTS5_TOKEN_* flags */
3303   const char *pToken,             /* Buffer containing token */
3304   int nToken,                     /* Size of token in bytes */
3305   int iStartOff,                  /* Start offset of token */
3306   int iEndOff                     /* End offset of token */
3307 ){
3308   int rc = SQLITE_OK;
3309 
3310   UNUSED_PARAM2(pToken, nToken);
3311   UNUSED_PARAM(iEndOff);
3312 
3313   if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
3314     Fts5SFinder *p = (Fts5SFinder*)pContext;
3315     if( p->iPos>0 ){
3316       int i;
3317       char c = 0;
3318       for(i=iStartOff-1; i>=0; i--){
3319         c = p->zDoc[i];
3320         if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break;
3321       }
3322       if( i!=iStartOff-1 && (c=='.' || c==':') ){
3323         rc = fts5SentenceFinderAdd(p, p->iPos);
3324       }
3325     }else{
3326       rc = fts5SentenceFinderAdd(p, 0);
3327     }
3328     p->iPos++;
3329   }
3330   return rc;
3331 }
3332 
3333 static int fts5SnippetScore(
3334   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
3335   Fts5Context *pFts,              /* First arg to pass to pApi functions */
3336   int nDocsize,                   /* Size of column in tokens */
3337   unsigned char *aSeen,           /* Array with one element per query phrase */
3338   int iCol,                       /* Column to score */
3339   int iPos,                       /* Starting offset to score */
3340   int nToken,                     /* Max tokens per snippet */
3341   int *pnScore,                   /* OUT: Score */
3342   int *piPos                      /* OUT: Adjusted offset */
3343 ){
3344   int rc;
3345   int i;
3346   int ip = 0;
3347   int ic = 0;
3348   int iOff = 0;
3349   int iFirst = -1;
3350   int nInst;
3351   int nScore = 0;
3352   int iLast = 0;
3353   sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken;
3354 
3355   rc = pApi->xInstCount(pFts, &nInst);
3356   for(i=0; i<nInst && rc==SQLITE_OK; i++){
3357     rc = pApi->xInst(pFts, i, &ip, &ic, &iOff);
3358     if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<iEnd ){
3359       nScore += (aSeen[ip] ? 1 : 1000);
3360       aSeen[ip] = 1;
3361       if( iFirst<0 ) iFirst = iOff;
3362       iLast = iOff + pApi->xPhraseSize(pFts, ip);
3363     }
3364   }
3365 
3366   *pnScore = nScore;
3367   if( piPos ){
3368     sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2;
3369     if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken;
3370     if( iAdj<0 ) iAdj = 0;
3371     *piPos = (int)iAdj;
3372   }
3373 
3374   return rc;
3375 }
3376 
3377 /*
3378 ** Return the value in pVal interpreted as utf-8 text. Except, if pVal
3379 ** contains a NULL value, return a pointer to a static string zero
3380 ** bytes in length instead of a NULL pointer.
3381 */
3382 static const char *fts5ValueToText(sqlite3_value *pVal){
3383   const char *zRet = (const char*)sqlite3_value_text(pVal);
3384   return zRet ? zRet : "";
3385 }
3386 
3387 /*
3388 ** Implementation of snippet() function.
3389 */
3390 static void fts5SnippetFunction(
3391   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
3392   Fts5Context *pFts,              /* First arg to pass to pApi functions */
3393   sqlite3_context *pCtx,          /* Context for returning result/error */
3394   int nVal,                       /* Number of values in apVal[] array */
3395   sqlite3_value **apVal           /* Array of trailing arguments */
3396 ){
3397   HighlightContext ctx;
3398   int rc = SQLITE_OK;             /* Return code */
3399   int iCol;                       /* 1st argument to snippet() */
3400   const char *zEllips;            /* 4th argument to snippet() */
3401   int nToken;                     /* 5th argument to snippet() */
3402   int nInst = 0;                  /* Number of instance matches this row */
3403   int i;                          /* Used to iterate through instances */
3404   int nPhrase;                    /* Number of phrases in query */
3405   unsigned char *aSeen;           /* Array of "seen instance" flags */
3406   int iBestCol;                   /* Column containing best snippet */
3407   int iBestStart = 0;             /* First token of best snippet */
3408   int nBestScore = 0;             /* Score of best snippet */
3409   int nColSize = 0;               /* Total size of iBestCol in tokens */
3410   Fts5SFinder sFinder;            /* Used to find the beginnings of sentences */
3411   int nCol;
3412 
3413   if( nVal!=5 ){
3414     const char *zErr = "wrong number of arguments to function snippet()";
3415     sqlite3_result_error(pCtx, zErr, -1);
3416     return;
3417   }
3418 
3419   nCol = pApi->xColumnCount(pFts);
3420   memset(&ctx, 0, sizeof(HighlightContext));
3421   iCol = sqlite3_value_int(apVal[0]);
3422   ctx.zOpen = fts5ValueToText(apVal[1]);
3423   ctx.zClose = fts5ValueToText(apVal[2]);
3424   zEllips = fts5ValueToText(apVal[3]);
3425   nToken = sqlite3_value_int(apVal[4]);
3426 
3427   iBestCol = (iCol>=0 ? iCol : 0);
3428   nPhrase = pApi->xPhraseCount(pFts);
3429   aSeen = sqlite3_malloc(nPhrase);
3430   if( aSeen==0 ){
3431     rc = SQLITE_NOMEM;
3432   }
3433   if( rc==SQLITE_OK ){
3434     rc = pApi->xInstCount(pFts, &nInst);
3435   }
3436 
3437   memset(&sFinder, 0, sizeof(Fts5SFinder));
3438   for(i=0; i<nCol; i++){
3439     if( iCol<0 || iCol==i ){
3440       int nDoc;
3441       int nDocsize;
3442       int ii;
3443       sFinder.iPos = 0;
3444       sFinder.nFirst = 0;
3445       rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc);
3446       if( rc!=SQLITE_OK ) break;
3447       rc = pApi->xTokenize(pFts,
3448           sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb
3449       );
3450       if( rc!=SQLITE_OK ) break;
3451       rc = pApi->xColumnSize(pFts, i, &nDocsize);
3452       if( rc!=SQLITE_OK ) break;
3453 
3454       for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){
3455         int ip, ic, io;
3456         int iAdj;
3457         int nScore;
3458         int jj;
3459 
3460         rc = pApi->xInst(pFts, ii, &ip, &ic, &io);
3461         if( ic!=i ) continue;
3462         if( io>nDocsize ) rc = FTS5_CORRUPT;
3463         if( rc!=SQLITE_OK ) continue;
3464         memset(aSeen, 0, nPhrase);
3465         rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3466             io, nToken, &nScore, &iAdj
3467         );
3468         if( rc==SQLITE_OK && nScore>nBestScore ){
3469           nBestScore = nScore;
3470           iBestCol = i;
3471           iBestStart = iAdj;
3472           nColSize = nDocsize;
3473         }
3474 
3475         if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){
3476           for(jj=0; jj<(sFinder.nFirst-1); jj++){
3477             if( sFinder.aFirst[jj+1]>io ) break;
3478           }
3479 
3480           if( sFinder.aFirst[jj]<io ){
3481             memset(aSeen, 0, nPhrase);
3482             rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i,
3483               sFinder.aFirst[jj], nToken, &nScore, 0
3484             );
3485 
3486             nScore += (sFinder.aFirst[jj]==0 ? 120 : 100);
3487             if( rc==SQLITE_OK && nScore>nBestScore ){
3488               nBestScore = nScore;
3489               iBestCol = i;
3490               iBestStart = sFinder.aFirst[jj];
3491               nColSize = nDocsize;
3492             }
3493           }
3494         }
3495       }
3496     }
3497   }
3498 
3499   if( rc==SQLITE_OK ){
3500     rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn);
3501   }
3502   if( rc==SQLITE_OK && nColSize==0 ){
3503     rc = pApi->xColumnSize(pFts, iBestCol, &nColSize);
3504   }
3505   if( ctx.zIn ){
3506     if( rc==SQLITE_OK ){
3507       rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter);
3508     }
3509 
3510     ctx.iRangeStart = iBestStart;
3511     ctx.iRangeEnd = iBestStart + nToken - 1;
3512 
3513     if( iBestStart>0 ){
3514       fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3515     }
3516 
3517     /* Advance iterator ctx.iter so that it points to the first coalesced
3518     ** phrase instance at or following position iBestStart. */
3519     while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){
3520       rc = fts5CInstIterNext(&ctx.iter);
3521     }
3522 
3523     if( rc==SQLITE_OK ){
3524       rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb);
3525     }
3526     if( ctx.iRangeEnd>=(nColSize-1) ){
3527       fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff);
3528     }else{
3529       fts5HighlightAppend(&rc, &ctx, zEllips, -1);
3530     }
3531   }
3532   if( rc==SQLITE_OK ){
3533     sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT);
3534   }else{
3535     sqlite3_result_error_code(pCtx, rc);
3536   }
3537   sqlite3_free(ctx.zOut);
3538   sqlite3_free(aSeen);
3539   sqlite3_free(sFinder.aFirst);
3540 }
3541 
3542 /************************************************************************/
3543 
3544 /*
3545 ** The first time the bm25() function is called for a query, an instance
3546 ** of the following structure is allocated and populated.
3547 */
3548 typedef struct Fts5Bm25Data Fts5Bm25Data;
3549 struct Fts5Bm25Data {
3550   int nPhrase;                    /* Number of phrases in query */
3551   double avgdl;                   /* Average number of tokens in each row */
3552   double *aIDF;                   /* IDF for each phrase */
3553   double *aFreq;                  /* Array used to calculate phrase freq. */
3554 };
3555 
3556 /*
3557 ** Callback used by fts5Bm25GetData() to count the number of rows in the
3558 ** table matched by each individual phrase within the query.
3559 */
3560 static int fts5CountCb(
3561   const Fts5ExtensionApi *pApi,
3562   Fts5Context *pFts,
3563   void *pUserData                 /* Pointer to sqlite3_int64 variable */
3564 ){
3565   sqlite3_int64 *pn = (sqlite3_int64*)pUserData;
3566   UNUSED_PARAM2(pApi, pFts);
3567   (*pn)++;
3568   return SQLITE_OK;
3569 }
3570 
3571 /*
3572 ** Set *ppData to point to the Fts5Bm25Data object for the current query.
3573 ** If the object has not already been allocated, allocate and populate it
3574 ** now.
3575 */
3576 static int fts5Bm25GetData(
3577   const Fts5ExtensionApi *pApi,
3578   Fts5Context *pFts,
3579   Fts5Bm25Data **ppData           /* OUT: bm25-data object for this query */
3580 ){
3581   int rc = SQLITE_OK;             /* Return code */
3582   Fts5Bm25Data *p;                /* Object to return */
3583 
3584   p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0);
3585   if( p==0 ){
3586     int nPhrase;                  /* Number of phrases in query */
3587     sqlite3_int64 nRow = 0;       /* Number of rows in table */
3588     sqlite3_int64 nToken = 0;     /* Number of tokens in table */
3589     sqlite3_int64 nByte;          /* Bytes of space to allocate */
3590     int i;
3591 
3592     /* Allocate the Fts5Bm25Data object */
3593     nPhrase = pApi->xPhraseCount(pFts);
3594     nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double);
3595     p = (Fts5Bm25Data*)sqlite3_malloc64(nByte);
3596     if( p==0 ){
3597       rc = SQLITE_NOMEM;
3598     }else{
3599       memset(p, 0, (size_t)nByte);
3600       p->nPhrase = nPhrase;
3601       p->aIDF = (double*)&p[1];
3602       p->aFreq = &p->aIDF[nPhrase];
3603     }
3604 
3605     /* Calculate the average document length for this FTS5 table */
3606     if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow);
3607     assert( rc!=SQLITE_OK || nRow>0 );
3608     if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken);
3609     if( rc==SQLITE_OK ) p->avgdl = (double)nToken  / (double)nRow;
3610 
3611     /* Calculate an IDF for each phrase in the query */
3612     for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
3613       sqlite3_int64 nHit = 0;
3614       rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb);
3615       if( rc==SQLITE_OK ){
3616         /* Calculate the IDF (Inverse Document Frequency) for phrase i.
3617         ** This is done using the standard BM25 formula as found on wikipedia:
3618         **
3619         **   IDF = log( (N - nHit + 0.5) / (nHit + 0.5) )
3620         **
3621         ** where "N" is the total number of documents in the set and nHit
3622         ** is the number that contain at least one instance of the phrase
3623         ** under consideration.
3624         **
3625         ** The problem with this is that if (N < 2*nHit), the IDF is
3626         ** negative. Which is undesirable. So the mimimum allowable IDF is
3627         ** (1e-6) - roughly the same as a term that appears in just over
3628         ** half of set of 5,000,000 documents.  */
3629         double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) );
3630         if( idf<=0.0 ) idf = 1e-6;
3631         p->aIDF[i] = idf;
3632       }
3633     }
3634 
3635     if( rc!=SQLITE_OK ){
3636       sqlite3_free(p);
3637     }else{
3638       rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
3639     }
3640     if( rc!=SQLITE_OK ) p = 0;
3641   }
3642   *ppData = p;
3643   return rc;
3644 }
3645 
3646 /*
3647 ** Implementation of bm25() function.
3648 */
3649 static void fts5Bm25Function(
3650   const Fts5ExtensionApi *pApi,   /* API offered by current FTS version */
3651   Fts5Context *pFts,              /* First arg to pass to pApi functions */
3652   sqlite3_context *pCtx,          /* Context for returning result/error */
3653   int nVal,                       /* Number of values in apVal[] array */
3654   sqlite3_value **apVal           /* Array of trailing arguments */
3655 ){
3656   const double k1 = 1.2;          /* Constant "k1" from BM25 formula */
3657   const double b = 0.75;          /* Constant "b" from BM25 formula */
3658   int rc;                         /* Error code */
3659   double score = 0.0;             /* SQL function return value */
3660   Fts5Bm25Data *pData;            /* Values allocated/calculated once only */
3661   int i;                          /* Iterator variable */
3662   int nInst = 0;                  /* Value returned by xInstCount() */
3663   double D = 0.0;                 /* Total number of tokens in row */
3664   double *aFreq = 0;              /* Array of phrase freq. for current row */
3665 
3666   /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation)
3667   ** for each phrase in the query for the current row. */
3668   rc = fts5Bm25GetData(pApi, pFts, &pData);
3669   if( rc==SQLITE_OK ){
3670     aFreq = pData->aFreq;
3671     memset(aFreq, 0, sizeof(double) * pData->nPhrase);
3672     rc = pApi->xInstCount(pFts, &nInst);
3673   }
3674   for(i=0; rc==SQLITE_OK && i<nInst; i++){
3675     int ip; int ic; int io;
3676     rc = pApi->xInst(pFts, i, &ip, &ic, &io);
3677     if( rc==SQLITE_OK ){
3678       double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0;
3679       aFreq[ip] += w;
3680     }
3681   }
3682 
3683   /* Figure out the total size of the current row in tokens. */
3684   if( rc==SQLITE_OK ){
3685     int nTok;
3686     rc = pApi->xColumnSize(pFts, -1, &nTok);
3687     D = (double)nTok;
3688   }
3689 
3690   /* Determine and return the BM25 score for the current row. Or, if an
3691   ** error has occurred, throw an exception. */
3692   if( rc==SQLITE_OK ){
3693     for(i=0; i<pData->nPhrase; i++){
3694       score += pData->aIDF[i] * (
3695           ( aFreq[i] * (k1 + 1.0) ) /
3696           ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) )
3697       );
3698     }
3699     sqlite3_result_double(pCtx, -1.0 * score);
3700   }else{
3701     sqlite3_result_error_code(pCtx, rc);
3702   }
3703 }
3704 
3705 static int sqlite3Fts5AuxInit(fts5_api *pApi){
3706   struct Builtin {
3707     const char *zFunc;            /* Function name (nul-terminated) */
3708     void *pUserData;              /* User-data pointer */
3709     fts5_extension_function xFunc;/* Callback function */
3710     void (*xDestroy)(void*);      /* Destructor function */
3711   } aBuiltin [] = {
3712     { "snippet",   0, fts5SnippetFunction, 0 },
3713     { "highlight", 0, fts5HighlightFunction, 0 },
3714     { "bm25",      0, fts5Bm25Function,    0 },
3715   };
3716   int rc = SQLITE_OK;             /* Return code */
3717   int i;                          /* To iterate through builtin functions */
3718 
3719   for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
3720     rc = pApi->xCreateFunction(pApi,
3721         aBuiltin[i].zFunc,
3722         aBuiltin[i].pUserData,
3723         aBuiltin[i].xFunc,
3724         aBuiltin[i].xDestroy
3725     );
3726   }
3727 
3728   return rc;
3729 }
3730 
3731 #line 1 "fts5_buffer.c"
3732 /*
3733 ** 2014 May 31
3734 **
3735 ** The author disclaims copyright to this source code.  In place of
3736 ** a legal notice, here is a blessing:
3737 **
3738 **    May you do good and not evil.
3739 **    May you find forgiveness for yourself and forgive others.
3740 **    May you share freely, never taking more than you give.
3741 **
3742 ******************************************************************************
3743 */
3744 
3745 
3746 
3747 /* #include "fts5Int.h" */
3748 
3749 static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){
3750   if( (u32)pBuf->nSpace<nByte ){
3751     u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64;
3752     u8 *pNew;
3753     while( nNew<nByte ){
3754       nNew = nNew * 2;
3755     }
3756     pNew = sqlite3_realloc64(pBuf->p, nNew);
3757     if( pNew==0 ){
3758       *pRc = SQLITE_NOMEM;
3759       return 1;
3760     }else{
3761       pBuf->nSpace = (int)nNew;
3762       pBuf->p = pNew;
3763     }
3764   }
3765   return 0;
3766 }
3767 
3768 
3769 /*
3770 ** Encode value iVal as an SQLite varint and append it to the buffer object
3771 ** pBuf. If an OOM error occurs, set the error code in p.
3772 */
3773 static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){
3774   if( fts5BufferGrow(pRc, pBuf, 9) ) return;
3775   pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal);
3776 }
3777 
3778 static void sqlite3Fts5Put32(u8 *aBuf, int iVal){
3779   aBuf[0] = (iVal>>24) & 0x00FF;
3780   aBuf[1] = (iVal>>16) & 0x00FF;
3781   aBuf[2] = (iVal>> 8) & 0x00FF;
3782   aBuf[3] = (iVal>> 0) & 0x00FF;
3783 }
3784 
3785 static int sqlite3Fts5Get32(const u8 *aBuf){
3786   return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]);
3787 }
3788 
3789 /*
3790 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set
3791 ** the error code in p. If an error has already occurred when this function
3792 ** is called, it is a no-op.
3793 */
3794 static void sqlite3Fts5BufferAppendBlob(
3795   int *pRc,
3796   Fts5Buffer *pBuf,
3797   u32 nData,
3798   const u8 *pData
3799 ){
3800   if( nData ){
3801     if( fts5BufferGrow(pRc, pBuf, nData) ) return;
3802     memcpy(&pBuf->p[pBuf->n], pData, nData);
3803     pBuf->n += nData;
3804   }
3805 }
3806 
3807 /*
3808 ** Append the nul-terminated string zStr to the buffer pBuf. This function
3809 ** ensures that the byte following the buffer data is set to 0x00, even
3810 ** though this byte is not included in the pBuf->n count.
3811 */
3812 static void sqlite3Fts5BufferAppendString(
3813   int *pRc,
3814   Fts5Buffer *pBuf,
3815   const char *zStr
3816 ){
3817   int nStr = (int)strlen(zStr);
3818   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr);
3819   pBuf->n--;
3820 }
3821 
3822 /*
3823 ** Argument zFmt is a printf() style format string. This function performs
3824 ** the printf() style processing, then appends the results to buffer pBuf.
3825 **
3826 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte
3827 ** following the buffer data is set to 0x00, even though this byte is not
3828 ** included in the pBuf->n count.
3829 */
3830 static void sqlite3Fts5BufferAppendPrintf(
3831   int *pRc,
3832   Fts5Buffer *pBuf,
3833   char *zFmt, ...
3834 ){
3835   if( *pRc==SQLITE_OK ){
3836     char *zTmp;
3837     va_list ap;
3838     va_start(ap, zFmt);
3839     zTmp = sqlite3_vmprintf(zFmt, ap);
3840     va_end(ap);
3841 
3842     if( zTmp==0 ){
3843       *pRc = SQLITE_NOMEM;
3844     }else{
3845       sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp);
3846       sqlite3_free(zTmp);
3847     }
3848   }
3849 }
3850 
3851 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){
3852   char *zRet = 0;
3853   if( *pRc==SQLITE_OK ){
3854     va_list ap;
3855     va_start(ap, zFmt);
3856     zRet = sqlite3_vmprintf(zFmt, ap);
3857     va_end(ap);
3858     if( zRet==0 ){
3859       *pRc = SQLITE_NOMEM;
3860     }
3861   }
3862   return zRet;
3863 }
3864 
3865 
3866 /*
3867 ** Free any buffer allocated by pBuf. Zero the structure before returning.
3868 */
3869 static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){
3870   sqlite3_free(pBuf->p);
3871   memset(pBuf, 0, sizeof(Fts5Buffer));
3872 }
3873 
3874 /*
3875 ** Zero the contents of the buffer object. But do not free the associated
3876 ** memory allocation.
3877 */
3878 static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){
3879   pBuf->n = 0;
3880 }
3881 
3882 /*
3883 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an
3884 ** the error code in p. If an error has already occurred when this function
3885 ** is called, it is a no-op.
3886 */
3887 static void sqlite3Fts5BufferSet(
3888   int *pRc,
3889   Fts5Buffer *pBuf,
3890   int nData,
3891   const u8 *pData
3892 ){
3893   pBuf->n = 0;
3894   sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData);
3895 }
3896 
3897 static int sqlite3Fts5PoslistNext64(
3898   const u8 *a, int n,             /* Buffer containing poslist */
3899   int *pi,                        /* IN/OUT: Offset within a[] */
3900   i64 *piOff                      /* IN/OUT: Current offset */
3901 ){
3902   int i = *pi;
3903   if( i>=n ){
3904     /* EOF */
3905     *piOff = -1;
3906     return 1;
3907   }else{
3908     i64 iOff = *piOff;
3909     u32 iVal;
3910     fts5FastGetVarint32(a, i, iVal);
3911     if( iVal<=1 ){
3912       if( iVal==0 ){
3913         *pi = i;
3914         return 0;
3915       }
3916       fts5FastGetVarint32(a, i, iVal);
3917       iOff = ((i64)iVal) << 32;
3918       assert( iOff>=0 );
3919       fts5FastGetVarint32(a, i, iVal);
3920       if( iVal<2 ){
3921         /* This is a corrupt record. So stop parsing it here. */
3922         *piOff = -1;
3923         return 1;
3924       }
3925       *piOff = iOff + ((iVal-2) & 0x7FFFFFFF);
3926     }else{
3927       *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF);
3928     }
3929     *pi = i;
3930     assert_nc( *piOff>=iOff );
3931     return 0;
3932   }
3933 }
3934 
3935 
3936 /*
3937 ** Advance the iterator object passed as the only argument. Return true
3938 ** if the iterator reaches EOF, or false otherwise.
3939 */
3940 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){
3941   if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){
3942     pIter->bEof = 1;
3943   }
3944   return pIter->bEof;
3945 }
3946 
3947 static int sqlite3Fts5PoslistReaderInit(
3948   const u8 *a, int n,             /* Poslist buffer to iterate through */
3949   Fts5PoslistReader *pIter        /* Iterator object to initialize */
3950 ){
3951   memset(pIter, 0, sizeof(*pIter));
3952   pIter->a = a;
3953   pIter->n = n;
3954   sqlite3Fts5PoslistReaderNext(pIter);
3955   return pIter->bEof;
3956 }
3957 
3958 /*
3959 ** Append position iPos to the position list being accumulated in buffer
3960 ** pBuf, which must be already be large enough to hold the new data.
3961 ** The previous position written to this list is *piPrev. *piPrev is set
3962 ** to iPos before returning.
3963 */
3964 static void sqlite3Fts5PoslistSafeAppend(
3965   Fts5Buffer *pBuf,
3966   i64 *piPrev,
3967   i64 iPos
3968 ){
3969   if( iPos>=*piPrev ){
3970     static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32;
3971     if( (iPos & colmask) != (*piPrev & colmask) ){
3972       pBuf->p[pBuf->n++] = 1;
3973       pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32));
3974       *piPrev = (iPos & colmask);
3975     }
3976     pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2);
3977     *piPrev = iPos;
3978   }
3979 }
3980 
3981 static int sqlite3Fts5PoslistWriterAppend(
3982   Fts5Buffer *pBuf,
3983   Fts5PoslistWriter *pWriter,
3984   i64 iPos
3985 ){
3986   int rc = 0;   /* Initialized only to suppress erroneous warning from Clang */
3987   if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc;
3988   sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos);
3989   return SQLITE_OK;
3990 }
3991 
3992 static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){
3993   void *pRet = 0;
3994   if( *pRc==SQLITE_OK ){
3995     pRet = sqlite3_malloc64(nByte);
3996     if( pRet==0 ){
3997       if( nByte>0 ) *pRc = SQLITE_NOMEM;
3998     }else{
3999       memset(pRet, 0, (size_t)nByte);
4000     }
4001   }
4002   return pRet;
4003 }
4004 
4005 /*
4006 ** Return a nul-terminated copy of the string indicated by pIn. If nIn
4007 ** is non-negative, then it is the length of the string in bytes. Otherwise,
4008 ** the length of the string is determined using strlen().
4009 **
4010 ** It is the responsibility of the caller to eventually free the returned
4011 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned.
4012 */
4013 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){
4014   char *zRet = 0;
4015   if( *pRc==SQLITE_OK ){
4016     if( nIn<0 ){
4017       nIn = (int)strlen(pIn);
4018     }
4019     zRet = (char*)sqlite3_malloc(nIn+1);
4020     if( zRet ){
4021       memcpy(zRet, pIn, nIn);
4022       zRet[nIn] = '\0';
4023     }else{
4024       *pRc = SQLITE_NOMEM;
4025     }
4026   }
4027   return zRet;
4028 }
4029 
4030 
4031 /*
4032 ** Return true if character 't' may be part of an FTS5 bareword, or false
4033 ** otherwise. Characters that may be part of barewords:
4034 **
4035 **   * All non-ASCII characters,
4036 **   * The 52 upper and lower case ASCII characters, and
4037 **   * The 10 integer ASCII characters.
4038 **   * The underscore character "_" (0x5F).
4039 **   * The unicode "subsitute" character (0x1A).
4040 */
4041 static int sqlite3Fts5IsBareword(char t){
4042   u8 aBareword[128] = {
4043     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00 .. 0x0F */
4044     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 1, 0, 0, 0, 0, 0,   /* 0x10 .. 0x1F */
4045     0, 0, 0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20 .. 0x2F */
4046     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30 .. 0x3F */
4047     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40 .. 0x4F */
4048     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 1,   /* 0x50 .. 0x5F */
4049     0, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60 .. 0x6F */
4050     1, 1, 1, 1, 1, 1, 1, 1,    1, 1, 1, 0, 0, 0, 0, 0    /* 0x70 .. 0x7F */
4051   };
4052 
4053   return (t & 0x80) || aBareword[(int)t];
4054 }
4055 
4056 
4057 /*************************************************************************
4058 */
4059 typedef struct Fts5TermsetEntry Fts5TermsetEntry;
4060 struct Fts5TermsetEntry {
4061   char *pTerm;
4062   int nTerm;
4063   int iIdx;                       /* Index (main or aPrefix[] entry) */
4064   Fts5TermsetEntry *pNext;
4065 };
4066 
4067 struct Fts5Termset {
4068   Fts5TermsetEntry *apHash[512];
4069 };
4070 
4071 static int sqlite3Fts5TermsetNew(Fts5Termset **pp){
4072   int rc = SQLITE_OK;
4073   *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset));
4074   return rc;
4075 }
4076 
4077 static int sqlite3Fts5TermsetAdd(
4078   Fts5Termset *p,
4079   int iIdx,
4080   const char *pTerm, int nTerm,
4081   int *pbPresent
4082 ){
4083   int rc = SQLITE_OK;
4084   *pbPresent = 0;
4085   if( p ){
4086     int i;
4087     u32 hash = 13;
4088     Fts5TermsetEntry *pEntry;
4089 
4090     /* Calculate a hash value for this term. This is the same hash checksum
4091     ** used by the fts5_hash.c module. This is not important for correct
4092     ** operation of the module, but is necessary to ensure that some tests
4093     ** designed to produce hash table collisions really do work.  */
4094     for(i=nTerm-1; i>=0; i--){
4095       hash = (hash << 3) ^ hash ^ pTerm[i];
4096     }
4097     hash = (hash << 3) ^ hash ^ iIdx;
4098     hash = hash % ArraySize(p->apHash);
4099 
4100     for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){
4101       if( pEntry->iIdx==iIdx
4102           && pEntry->nTerm==nTerm
4103           && memcmp(pEntry->pTerm, pTerm, nTerm)==0
4104       ){
4105         *pbPresent = 1;
4106         break;
4107       }
4108     }
4109 
4110     if( pEntry==0 ){
4111       pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm);
4112       if( pEntry ){
4113         pEntry->pTerm = (char*)&pEntry[1];
4114         pEntry->nTerm = nTerm;
4115         pEntry->iIdx = iIdx;
4116         memcpy(pEntry->pTerm, pTerm, nTerm);
4117         pEntry->pNext = p->apHash[hash];
4118         p->apHash[hash] = pEntry;
4119       }
4120     }
4121   }
4122 
4123   return rc;
4124 }
4125 
4126 static void sqlite3Fts5TermsetFree(Fts5Termset *p){
4127   if( p ){
4128     u32 i;
4129     for(i=0; i<ArraySize(p->apHash); i++){
4130       Fts5TermsetEntry *pEntry = p->apHash[i];
4131       while( pEntry ){
4132         Fts5TermsetEntry *pDel = pEntry;
4133         pEntry = pEntry->pNext;
4134         sqlite3_free(pDel);
4135       }
4136     }
4137     sqlite3_free(p);
4138   }
4139 }
4140 
4141 #line 1 "fts5_config.c"
4142 /*
4143 ** 2014 Jun 09
4144 **
4145 ** The author disclaims copyright to this source code.  In place of
4146 ** a legal notice, here is a blessing:
4147 **
4148 **    May you do good and not evil.
4149 **    May you find forgiveness for yourself and forgive others.
4150 **    May you share freely, never taking more than you give.
4151 **
4152 ******************************************************************************
4153 **
4154 ** This is an SQLite module implementing full-text search.
4155 */
4156 
4157 
4158 /* #include "fts5Int.h" */
4159 
4160 #define FTS5_DEFAULT_PAGE_SIZE   4050
4161 #define FTS5_DEFAULT_AUTOMERGE      4
4162 #define FTS5_DEFAULT_USERMERGE      4
4163 #define FTS5_DEFAULT_CRISISMERGE   16
4164 #define FTS5_DEFAULT_HASHSIZE    (1024*1024)
4165 
4166 /* Maximum allowed page size */
4167 #define FTS5_MAX_PAGE_SIZE (64*1024)
4168 
4169 static int fts5_iswhitespace(char x){
4170   return (x==' ');
4171 }
4172 
4173 static int fts5_isopenquote(char x){
4174   return (x=='"' || x=='\'' || x=='[' || x=='`');
4175 }
4176 
4177 /*
4178 ** Argument pIn points to a character that is part of a nul-terminated
4179 ** string. Return a pointer to the first character following *pIn in
4180 ** the string that is not a white-space character.
4181 */
4182 static const char *fts5ConfigSkipWhitespace(const char *pIn){
4183   const char *p = pIn;
4184   if( p ){
4185     while( fts5_iswhitespace(*p) ){ p++; }
4186   }
4187   return p;
4188 }
4189 
4190 /*
4191 ** Argument pIn points to a character that is part of a nul-terminated
4192 ** string. Return a pointer to the first character following *pIn in
4193 ** the string that is not a "bareword" character.
4194 */
4195 static const char *fts5ConfigSkipBareword(const char *pIn){
4196   const char *p = pIn;
4197   while ( sqlite3Fts5IsBareword(*p) ) p++;
4198   if( p==pIn ) p = 0;
4199   return p;
4200 }
4201 
4202 static int fts5_isdigit(char a){
4203   return (a>='0' && a<='9');
4204 }
4205 
4206 
4207 
4208 static const char *fts5ConfigSkipLiteral(const char *pIn){
4209   const char *p = pIn;
4210   switch( *p ){
4211     case 'n': case 'N':
4212       if( sqlite3_strnicmp("null", p, 4)==0 ){
4213         p = &p[4];
4214       }else{
4215         p = 0;
4216       }
4217       break;
4218 
4219     case 'x': case 'X':
4220       p++;
4221       if( *p=='\'' ){
4222         p++;
4223         while( (*p>='a' && *p<='f')
4224             || (*p>='A' && *p<='F')
4225             || (*p>='0' && *p<='9')
4226             ){
4227           p++;
4228         }
4229         if( *p=='\'' && 0==((p-pIn)%2) ){
4230           p++;
4231         }else{
4232           p = 0;
4233         }
4234       }else{
4235         p = 0;
4236       }
4237       break;
4238 
4239     case '\'':
4240       p++;
4241       while( p ){
4242         if( *p=='\'' ){
4243           p++;
4244           if( *p!='\'' ) break;
4245         }
4246         p++;
4247         if( *p==0 ) p = 0;
4248       }
4249       break;
4250 
4251     default:
4252       /* maybe a number */
4253       if( *p=='+' || *p=='-' ) p++;
4254       while( fts5_isdigit(*p) ) p++;
4255 
4256       /* At this point, if the literal was an integer, the parse is
4257       ** finished. Or, if it is a floating point value, it may continue
4258       ** with either a decimal point or an 'E' character. */
4259       if( *p=='.' && fts5_isdigit(p[1]) ){
4260         p += 2;
4261         while( fts5_isdigit(*p) ) p++;
4262       }
4263       if( p==pIn ) p = 0;
4264 
4265       break;
4266   }
4267 
4268   return p;
4269 }
4270 
4271 /*
4272 ** The first character of the string pointed to by argument z is guaranteed
4273 ** to be an open-quote character (see function fts5_isopenquote()).
4274 **
4275 ** This function searches for the corresponding close-quote character within
4276 ** the string and, if found, dequotes the string in place and adds a new
4277 ** nul-terminator byte.
4278 **
4279 ** If the close-quote is found, the value returned is the byte offset of
4280 ** the character immediately following it. Or, if the close-quote is not
4281 ** found, -1 is returned. If -1 is returned, the buffer is left in an
4282 ** undefined state.
4283 */
4284 static int fts5Dequote(char *z){
4285   char q;
4286   int iIn = 1;
4287   int iOut = 0;
4288   q = z[0];
4289 
4290   /* Set stack variable q to the close-quote character */
4291   assert( q=='[' || q=='\'' || q=='"' || q=='`' );
4292   if( q=='[' ) q = ']';
4293 
4294   while( z[iIn] ){
4295     if( z[iIn]==q ){
4296       if( z[iIn+1]!=q ){
4297         /* Character iIn was the close quote. */
4298         iIn++;
4299         break;
4300       }else{
4301         /* Character iIn and iIn+1 form an escaped quote character. Skip
4302         ** the input cursor past both and copy a single quote character
4303         ** to the output buffer. */
4304         iIn += 2;
4305         z[iOut++] = q;
4306       }
4307     }else{
4308       z[iOut++] = z[iIn++];
4309     }
4310   }
4311 
4312   z[iOut] = '\0';
4313   return iIn;
4314 }
4315 
4316 /*
4317 ** Convert an SQL-style quoted string into a normal string by removing
4318 ** the quote characters.  The conversion is done in-place.  If the
4319 ** input does not begin with a quote character, then this routine
4320 ** is a no-op.
4321 **
4322 ** Examples:
4323 **
4324 **     "abc"   becomes   abc
4325 **     'xyz'   becomes   xyz
4326 **     [pqr]   becomes   pqr
4327 **     `mno`   becomes   mno
4328 */
4329 static void sqlite3Fts5Dequote(char *z){
4330   char quote;                     /* Quote character (if any ) */
4331 
4332   assert( 0==fts5_iswhitespace(z[0]) );
4333   quote = z[0];
4334   if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){
4335     fts5Dequote(z);
4336   }
4337 }
4338 
4339 
4340 struct Fts5Enum {
4341   const char *zName;
4342   int eVal;
4343 };
4344 typedef struct Fts5Enum Fts5Enum;
4345 
4346 static int fts5ConfigSetEnum(
4347   const Fts5Enum *aEnum,
4348   const char *zEnum,
4349   int *peVal
4350 ){
4351   int nEnum = (int)strlen(zEnum);
4352   int i;
4353   int iVal = -1;
4354 
4355   for(i=0; aEnum[i].zName; i++){
4356     if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){
4357       if( iVal>=0 ) return SQLITE_ERROR;
4358       iVal = aEnum[i].eVal;
4359     }
4360   }
4361 
4362   *peVal = iVal;
4363   return iVal<0 ? SQLITE_ERROR : SQLITE_OK;
4364 }
4365 
4366 /*
4367 ** Parse a "special" CREATE VIRTUAL TABLE directive and update
4368 ** configuration object pConfig as appropriate.
4369 **
4370 ** If successful, object pConfig is updated and SQLITE_OK returned. If
4371 ** an error occurs, an SQLite error code is returned and an error message
4372 ** may be left in *pzErr. It is the responsibility of the caller to
4373 ** eventually free any such error message using sqlite3_free().
4374 */
4375 static int fts5ConfigParseSpecial(
4376   Fts5Global *pGlobal,
4377   Fts5Config *pConfig,            /* Configuration object to update */
4378   const char *zCmd,               /* Special command to parse */
4379   const char *zArg,               /* Argument to parse */
4380   char **pzErr                    /* OUT: Error message */
4381 ){
4382   int rc = SQLITE_OK;
4383   int nCmd = (int)strlen(zCmd);
4384   if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){
4385     const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES;
4386     const char *p;
4387     int bFirst = 1;
4388     if( pConfig->aPrefix==0 ){
4389       pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte);
4390       if( rc ) return rc;
4391     }
4392 
4393     p = zArg;
4394     while( 1 ){
4395       int nPre = 0;
4396 
4397       while( p[0]==' ' ) p++;
4398       if( bFirst==0 && p[0]==',' ){
4399         p++;
4400         while( p[0]==' ' ) p++;
4401       }else if( p[0]=='\0' ){
4402         break;
4403       }
4404       if( p[0]<'0' || p[0]>'9' ){
4405         *pzErr = sqlite3_mprintf("malformed prefix=... directive");
4406         rc = SQLITE_ERROR;
4407         break;
4408       }
4409 
4410       if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){
4411         *pzErr = sqlite3_mprintf(
4412             "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES
4413         );
4414         rc = SQLITE_ERROR;
4415         break;
4416       }
4417 
4418       while( p[0]>='0' && p[0]<='9' && nPre<1000 ){
4419         nPre = nPre*10 + (p[0] - '0');
4420         p++;
4421       }
4422 
4423       if( nPre<=0 || nPre>=1000 ){
4424         *pzErr = sqlite3_mprintf("prefix length out of range (max 999)");
4425         rc = SQLITE_ERROR;
4426         break;
4427       }
4428 
4429       pConfig->aPrefix[pConfig->nPrefix] = nPre;
4430       pConfig->nPrefix++;
4431       bFirst = 0;
4432     }
4433     assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES );
4434     return rc;
4435   }
4436 
4437   if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){
4438     const char *p = (const char*)zArg;
4439     sqlite3_int64 nArg = strlen(zArg) + 1;
4440     char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg);
4441     char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2);
4442     char *pSpace = pDel;
4443 
4444     if( azArg && pSpace ){
4445       if( pConfig->pTok ){
4446         *pzErr = sqlite3_mprintf("multiple tokenize=... directives");
4447         rc = SQLITE_ERROR;
4448       }else{
4449         for(nArg=0; p && *p; nArg++){
4450           const char *p2 = fts5ConfigSkipWhitespace(p);
4451           if( *p2=='\'' ){
4452             p = fts5ConfigSkipLiteral(p2);
4453           }else{
4454             p = fts5ConfigSkipBareword(p2);
4455           }
4456           if( p ){
4457             memcpy(pSpace, p2, p-p2);
4458             azArg[nArg] = pSpace;
4459             sqlite3Fts5Dequote(pSpace);
4460             pSpace += (p - p2) + 1;
4461             p = fts5ConfigSkipWhitespace(p);
4462           }
4463         }
4464         if( p==0 ){
4465           *pzErr = sqlite3_mprintf("parse error in tokenize directive");
4466           rc = SQLITE_ERROR;
4467         }else{
4468           rc = sqlite3Fts5GetTokenizer(pGlobal,
4469               (const char**)azArg, (int)nArg, pConfig,
4470               pzErr
4471           );
4472         }
4473       }
4474     }
4475 
4476     sqlite3_free(azArg);
4477     sqlite3_free(pDel);
4478     return rc;
4479   }
4480 
4481   if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){
4482     if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
4483       *pzErr = sqlite3_mprintf("multiple content=... directives");
4484       rc = SQLITE_ERROR;
4485     }else{
4486       if( zArg[0] ){
4487         pConfig->eContent = FTS5_CONTENT_EXTERNAL;
4488         pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg);
4489       }else{
4490         pConfig->eContent = FTS5_CONTENT_NONE;
4491       }
4492     }
4493     return rc;
4494   }
4495 
4496   if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){
4497     if( pConfig->zContentRowid ){
4498       *pzErr = sqlite3_mprintf("multiple content_rowid=... directives");
4499       rc = SQLITE_ERROR;
4500     }else{
4501       pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1);
4502     }
4503     return rc;
4504   }
4505 
4506   if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){
4507     if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
4508       *pzErr = sqlite3_mprintf("malformed columnsize=... directive");
4509       rc = SQLITE_ERROR;
4510     }else{
4511       pConfig->bColumnsize = (zArg[0]=='1');
4512     }
4513     return rc;
4514   }
4515 
4516   if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
4517     const Fts5Enum aDetail[] = {
4518       { "none", FTS5_DETAIL_NONE },
4519       { "full", FTS5_DETAIL_FULL },
4520       { "columns", FTS5_DETAIL_COLUMNS },
4521       { 0, 0 }
4522     };
4523 
4524     if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){
4525       *pzErr = sqlite3_mprintf("malformed detail=... directive");
4526     }
4527     return rc;
4528   }
4529 
4530   *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd);
4531   return SQLITE_ERROR;
4532 }
4533 
4534 /*
4535 ** Allocate an instance of the default tokenizer ("simple") at
4536 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error
4537 ** code if an error occurs.
4538 */
4539 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){
4540   assert( pConfig->pTok==0 && pConfig->pTokApi==0 );
4541   return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0);
4542 }
4543 
4544 /*
4545 ** Gobble up the first bareword or quoted word from the input buffer zIn.
4546 ** Return a pointer to the character immediately following the last in
4547 ** the gobbled word if successful, or a NULL pointer otherwise (failed
4548 ** to find close-quote character).
4549 **
4550 ** Before returning, set pzOut to point to a new buffer containing a
4551 ** nul-terminated, dequoted copy of the gobbled word. If the word was
4552 ** quoted, *pbQuoted is also set to 1 before returning.
4553 **
4554 ** If *pRc is other than SQLITE_OK when this function is called, it is
4555 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this
4556 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not*
4557 ** set if a parse error (failed to find close quote) occurs.
4558 */
4559 static const char *fts5ConfigGobbleWord(
4560   int *pRc,                       /* IN/OUT: Error code */
4561   const char *zIn,                /* Buffer to gobble string/bareword from */
4562   char **pzOut,                   /* OUT: malloc'd buffer containing str/bw */
4563   int *pbQuoted                   /* OUT: Set to true if dequoting required */
4564 ){
4565   const char *zRet = 0;
4566 
4567   sqlite3_int64 nIn = strlen(zIn);
4568   char *zOut = sqlite3_malloc64(nIn+1);
4569 
4570   assert( *pRc==SQLITE_OK );
4571   *pbQuoted = 0;
4572   *pzOut = 0;
4573 
4574   if( zOut==0 ){
4575     *pRc = SQLITE_NOMEM;
4576   }else{
4577     memcpy(zOut, zIn, (size_t)(nIn+1));
4578     if( fts5_isopenquote(zOut[0]) ){
4579       int ii = fts5Dequote(zOut);
4580       zRet = &zIn[ii];
4581       *pbQuoted = 1;
4582     }else{
4583       zRet = fts5ConfigSkipBareword(zIn);
4584       if( zRet ){
4585         zOut[zRet-zIn] = '\0';
4586       }
4587     }
4588   }
4589 
4590   if( zRet==0 ){
4591     sqlite3_free(zOut);
4592   }else{
4593     *pzOut = zOut;
4594   }
4595 
4596   return zRet;
4597 }
4598 
4599 static int fts5ConfigParseColumn(
4600   Fts5Config *p,
4601   char *zCol,
4602   char *zArg,
4603   char **pzErr
4604 ){
4605   int rc = SQLITE_OK;
4606   if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME)
4607    || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME)
4608   ){
4609     *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol);
4610     rc = SQLITE_ERROR;
4611   }else if( zArg ){
4612     if( 0==sqlite3_stricmp(zArg, "unindexed") ){
4613       p->abUnindexed[p->nCol] = 1;
4614     }else{
4615       *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg);
4616       rc = SQLITE_ERROR;
4617     }
4618   }
4619 
4620   p->azCol[p->nCol++] = zCol;
4621   return rc;
4622 }
4623 
4624 /*
4625 ** Populate the Fts5Config.zContentExprlist string.
4626 */
4627 static int fts5ConfigMakeExprlist(Fts5Config *p){
4628   int i;
4629   int rc = SQLITE_OK;
4630   Fts5Buffer buf = {0, 0, 0};
4631 
4632   sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid);
4633   if( p->eContent!=FTS5_CONTENT_NONE ){
4634     for(i=0; i<p->nCol; i++){
4635       if( p->eContent==FTS5_CONTENT_EXTERNAL ){
4636         sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]);
4637       }else{
4638         sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i);
4639       }
4640     }
4641   }
4642 
4643   assert( p->zContentExprlist==0 );
4644   p->zContentExprlist = (char*)buf.p;
4645   return rc;
4646 }
4647 
4648 /*
4649 ** Arguments nArg/azArg contain the string arguments passed to the xCreate
4650 ** or xConnect method of the virtual table. This function attempts to
4651 ** allocate an instance of Fts5Config containing the results of parsing
4652 ** those arguments.
4653 **
4654 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the
4655 ** new Fts5Config object. If an error occurs, an SQLite error code is
4656 ** returned, *ppOut is set to NULL and an error message may be left in
4657 ** *pzErr. It is the responsibility of the caller to eventually free any
4658 ** such error message using sqlite3_free().
4659 */
4660 static int sqlite3Fts5ConfigParse(
4661   Fts5Global *pGlobal,
4662   sqlite3 *db,
4663   int nArg,                       /* Number of arguments */
4664   const char **azArg,             /* Array of nArg CREATE VIRTUAL TABLE args */
4665   Fts5Config **ppOut,             /* OUT: Results of parse */
4666   char **pzErr                    /* OUT: Error message */
4667 ){
4668   int rc = SQLITE_OK;             /* Return code */
4669   Fts5Config *pRet;               /* New object to return */
4670   int i;
4671   sqlite3_int64 nByte;
4672 
4673   *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config));
4674   if( pRet==0 ) return SQLITE_NOMEM;
4675   memset(pRet, 0, sizeof(Fts5Config));
4676   pRet->db = db;
4677   pRet->iCookie = -1;
4678 
4679   nByte = nArg * (sizeof(char*) + sizeof(u8));
4680   pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte);
4681   pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0;
4682   pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1);
4683   pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1);
4684   pRet->bColumnsize = 1;
4685   pRet->eDetail = FTS5_DETAIL_FULL;
4686 #ifdef SQLITE_DEBUG
4687   pRet->bPrefixIndex = 1;
4688 #endif
4689   if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){
4690     *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName);
4691     rc = SQLITE_ERROR;
4692   }
4693 
4694   for(i=3; rc==SQLITE_OK && i<nArg; i++){
4695     const char *zOrig = azArg[i];
4696     const char *z;
4697     char *zOne = 0;
4698     char *zTwo = 0;
4699     int bOption = 0;
4700     int bMustBeCol = 0;
4701 
4702     z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol);
4703     z = fts5ConfigSkipWhitespace(z);
4704     if( z && *z=='=' ){
4705       bOption = 1;
4706       assert( zOne!=0 );
4707       z++;
4708       if( bMustBeCol ) z = 0;
4709     }
4710     z = fts5ConfigSkipWhitespace(z);
4711     if( z && z[0] ){
4712       int bDummy;
4713       z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy);
4714       if( z && z[0] ) z = 0;
4715     }
4716 
4717     if( rc==SQLITE_OK ){
4718       if( z==0 ){
4719         *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig);
4720         rc = SQLITE_ERROR;
4721       }else{
4722         if( bOption ){
4723           rc = fts5ConfigParseSpecial(pGlobal, pRet,
4724             ALWAYS(zOne)?zOne:"",
4725             zTwo?zTwo:"",
4726             pzErr
4727           );
4728         }else{
4729           rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr);
4730           zOne = 0;
4731         }
4732       }
4733     }
4734 
4735     sqlite3_free(zOne);
4736     sqlite3_free(zTwo);
4737   }
4738 
4739   /* If a tokenizer= option was successfully parsed, the tokenizer has
4740   ** already been allocated. Otherwise, allocate an instance of the default
4741   ** tokenizer (unicode61) now.  */
4742   if( rc==SQLITE_OK && pRet->pTok==0 ){
4743     rc = fts5ConfigDefaultTokenizer(pGlobal, pRet);
4744   }
4745 
4746   /* If no zContent option was specified, fill in the default values. */
4747   if( rc==SQLITE_OK && pRet->zContent==0 ){
4748     const char *zTail = 0;
4749     assert( pRet->eContent==FTS5_CONTENT_NORMAL
4750          || pRet->eContent==FTS5_CONTENT_NONE
4751     );
4752     if( pRet->eContent==FTS5_CONTENT_NORMAL ){
4753       zTail = "content";
4754     }else if( pRet->bColumnsize ){
4755       zTail = "docsize";
4756     }
4757 
4758     if( zTail ){
4759       pRet->zContent = sqlite3Fts5Mprintf(
4760           &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail
4761       );
4762     }
4763   }
4764 
4765   if( rc==SQLITE_OK && pRet->zContentRowid==0 ){
4766     pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1);
4767   }
4768 
4769   /* Formulate the zContentExprlist text */
4770   if( rc==SQLITE_OK ){
4771     rc = fts5ConfigMakeExprlist(pRet);
4772   }
4773 
4774   if( rc!=SQLITE_OK ){
4775     sqlite3Fts5ConfigFree(pRet);
4776     *ppOut = 0;
4777   }
4778   return rc;
4779 }
4780 
4781 /*
4782 ** Free the configuration object passed as the only argument.
4783 */
4784 static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){
4785   if( pConfig ){
4786     int i;
4787     if( pConfig->pTok ){
4788       pConfig->pTokApi->xDelete(pConfig->pTok);
4789     }
4790     sqlite3_free(pConfig->zDb);
4791     sqlite3_free(pConfig->zName);
4792     for(i=0; i<pConfig->nCol; i++){
4793       sqlite3_free(pConfig->azCol[i]);
4794     }
4795     sqlite3_free(pConfig->azCol);
4796     sqlite3_free(pConfig->aPrefix);
4797     sqlite3_free(pConfig->zRank);
4798     sqlite3_free(pConfig->zRankArgs);
4799     sqlite3_free(pConfig->zContent);
4800     sqlite3_free(pConfig->zContentRowid);
4801     sqlite3_free(pConfig->zContentExprlist);
4802     sqlite3_free(pConfig);
4803   }
4804 }
4805 
4806 /*
4807 ** Call sqlite3_declare_vtab() based on the contents of the configuration
4808 ** object passed as the only argument. Return SQLITE_OK if successful, or
4809 ** an SQLite error code if an error occurs.
4810 */
4811 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){
4812   int i;
4813   int rc = SQLITE_OK;
4814   char *zSql;
4815 
4816   zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x(");
4817   for(i=0; zSql && i<pConfig->nCol; i++){
4818     const char *zSep = (i==0?"":", ");
4819     zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]);
4820   }
4821   zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)",
4822       zSql, pConfig->zName, FTS5_RANK_NAME
4823   );
4824 
4825   assert( zSql || rc==SQLITE_NOMEM );
4826   if( zSql ){
4827     rc = sqlite3_declare_vtab(pConfig->db, zSql);
4828     sqlite3_free(zSql);
4829   }
4830 
4831   return rc;
4832 }
4833 
4834 /*
4835 ** Tokenize the text passed via the second and third arguments.
4836 **
4837 ** The callback is invoked once for each token in the input text. The
4838 ** arguments passed to it are, in order:
4839 **
4840 **     void *pCtx          // Copy of 4th argument to sqlite3Fts5Tokenize()
4841 **     const char *pToken  // Pointer to buffer containing token
4842 **     int nToken          // Size of token in bytes
4843 **     int iStart          // Byte offset of start of token within input text
4844 **     int iEnd            // Byte offset of end of token within input text
4845 **     int iPos            // Position of token in input (first token is 0)
4846 **
4847 ** If the callback returns a non-zero value the tokenization is abandoned
4848 ** and no further callbacks are issued.
4849 **
4850 ** This function returns SQLITE_OK if successful or an SQLite error code
4851 ** if an error occurs. If the tokenization was abandoned early because
4852 ** the callback returned SQLITE_DONE, this is not an error and this function
4853 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early
4854 ** because the callback returned another non-zero value, it is assumed
4855 ** to be an SQLite error code and returned to the caller.
4856 */
4857 static int sqlite3Fts5Tokenize(
4858   Fts5Config *pConfig,            /* FTS5 Configuration object */
4859   int flags,                      /* FTS5_TOKENIZE_* flags */
4860   const char *pText, int nText,   /* Text to tokenize */
4861   void *pCtx,                     /* Context passed to xToken() */
4862   int (*xToken)(void*, int, const char*, int, int, int)    /* Callback */
4863 ){
4864   if( pText==0 ) return SQLITE_OK;
4865   return pConfig->pTokApi->xTokenize(
4866       pConfig->pTok, pCtx, flags, pText, nText, xToken
4867   );
4868 }
4869 
4870 /*
4871 ** Argument pIn points to the first character in what is expected to be
4872 ** a comma-separated list of SQL literals followed by a ')' character.
4873 ** If it actually is this, return a pointer to the ')'. Otherwise, return
4874 ** NULL to indicate a parse error.
4875 */
4876 static const char *fts5ConfigSkipArgs(const char *pIn){
4877   const char *p = pIn;
4878 
4879   while( 1 ){
4880     p = fts5ConfigSkipWhitespace(p);
4881     p = fts5ConfigSkipLiteral(p);
4882     p = fts5ConfigSkipWhitespace(p);
4883     if( p==0 || *p==')' ) break;
4884     if( *p!=',' ){
4885       p = 0;
4886       break;
4887     }
4888     p++;
4889   }
4890 
4891   return p;
4892 }
4893 
4894 /*
4895 ** Parameter zIn contains a rank() function specification. The format of
4896 ** this is:
4897 **
4898 **   + Bareword (function name)
4899 **   + Open parenthesis - "("
4900 **   + Zero or more SQL literals in a comma separated list
4901 **   + Close parenthesis - ")"
4902 */
4903 static int sqlite3Fts5ConfigParseRank(
4904   const char *zIn,                /* Input string */
4905   char **pzRank,                  /* OUT: Rank function name */
4906   char **pzRankArgs               /* OUT: Rank function arguments */
4907 ){
4908   const char *p = zIn;
4909   const char *pRank;
4910   char *zRank = 0;
4911   char *zRankArgs = 0;
4912   int rc = SQLITE_OK;
4913 
4914   *pzRank = 0;
4915   *pzRankArgs = 0;
4916 
4917   if( p==0 ){
4918     rc = SQLITE_ERROR;
4919   }else{
4920     p = fts5ConfigSkipWhitespace(p);
4921     pRank = p;
4922     p = fts5ConfigSkipBareword(p);
4923 
4924     if( p ){
4925       zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank);
4926       if( zRank ) memcpy(zRank, pRank, p-pRank);
4927     }else{
4928       rc = SQLITE_ERROR;
4929     }
4930 
4931     if( rc==SQLITE_OK ){
4932       p = fts5ConfigSkipWhitespace(p);
4933       if( *p!='(' ) rc = SQLITE_ERROR;
4934       p++;
4935     }
4936     if( rc==SQLITE_OK ){
4937       const char *pArgs;
4938       p = fts5ConfigSkipWhitespace(p);
4939       pArgs = p;
4940       if( *p!=')' ){
4941         p = fts5ConfigSkipArgs(p);
4942         if( p==0 ){
4943           rc = SQLITE_ERROR;
4944         }else{
4945           zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs);
4946           if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs);
4947         }
4948       }
4949     }
4950   }
4951 
4952   if( rc!=SQLITE_OK ){
4953     sqlite3_free(zRank);
4954     assert( zRankArgs==0 );
4955   }else{
4956     *pzRank = zRank;
4957     *pzRankArgs = zRankArgs;
4958   }
4959   return rc;
4960 }
4961 
4962 static int sqlite3Fts5ConfigSetValue(
4963   Fts5Config *pConfig,
4964   const char *zKey,
4965   sqlite3_value *pVal,
4966   int *pbBadkey
4967 ){
4968   int rc = SQLITE_OK;
4969 
4970   if( 0==sqlite3_stricmp(zKey, "pgsz") ){
4971     int pgsz = 0;
4972     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4973       pgsz = sqlite3_value_int(pVal);
4974     }
4975     if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){
4976       *pbBadkey = 1;
4977     }else{
4978       pConfig->pgsz = pgsz;
4979     }
4980   }
4981 
4982   else if( 0==sqlite3_stricmp(zKey, "hashsize") ){
4983     int nHashSize = -1;
4984     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4985       nHashSize = sqlite3_value_int(pVal);
4986     }
4987     if( nHashSize<=0 ){
4988       *pbBadkey = 1;
4989     }else{
4990       pConfig->nHashSize = nHashSize;
4991     }
4992   }
4993 
4994   else if( 0==sqlite3_stricmp(zKey, "automerge") ){
4995     int nAutomerge = -1;
4996     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
4997       nAutomerge = sqlite3_value_int(pVal);
4998     }
4999     if( nAutomerge<0 || nAutomerge>64 ){
5000       *pbBadkey = 1;
5001     }else{
5002       if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE;
5003       pConfig->nAutomerge = nAutomerge;
5004     }
5005   }
5006 
5007   else if( 0==sqlite3_stricmp(zKey, "usermerge") ){
5008     int nUsermerge = -1;
5009     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
5010       nUsermerge = sqlite3_value_int(pVal);
5011     }
5012     if( nUsermerge<2 || nUsermerge>16 ){
5013       *pbBadkey = 1;
5014     }else{
5015       pConfig->nUsermerge = nUsermerge;
5016     }
5017   }
5018 
5019   else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){
5020     int nCrisisMerge = -1;
5021     if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){
5022       nCrisisMerge = sqlite3_value_int(pVal);
5023     }
5024     if( nCrisisMerge<0 ){
5025       *pbBadkey = 1;
5026     }else{
5027       if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
5028       if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1;
5029       pConfig->nCrisisMerge = nCrisisMerge;
5030     }
5031   }
5032 
5033   else if( 0==sqlite3_stricmp(zKey, "rank") ){
5034     const char *zIn = (const char*)sqlite3_value_text(pVal);
5035     char *zRank;
5036     char *zRankArgs;
5037     rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs);
5038     if( rc==SQLITE_OK ){
5039       sqlite3_free(pConfig->zRank);
5040       sqlite3_free(pConfig->zRankArgs);
5041       pConfig->zRank = zRank;
5042       pConfig->zRankArgs = zRankArgs;
5043     }else if( rc==SQLITE_ERROR ){
5044       rc = SQLITE_OK;
5045       *pbBadkey = 1;
5046     }
5047   }else{
5048     *pbBadkey = 1;
5049   }
5050   return rc;
5051 }
5052 
5053 /*
5054 ** Load the contents of the %_config table into memory.
5055 */
5056 static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){
5057   const char *zSelect = "SELECT k, v FROM %Q.'%q_config'";
5058   char *zSql;
5059   sqlite3_stmt *p = 0;
5060   int rc = SQLITE_OK;
5061   int iVersion = 0;
5062 
5063   /* Set default values */
5064   pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE;
5065   pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE;
5066   pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE;
5067   pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE;
5068   pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE;
5069 
5070   zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName);
5071   if( zSql ){
5072     rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0);
5073     sqlite3_free(zSql);
5074   }
5075 
5076   assert( rc==SQLITE_OK || p==0 );
5077   if( rc==SQLITE_OK ){
5078     while( SQLITE_ROW==sqlite3_step(p) ){
5079       const char *zK = (const char*)sqlite3_column_text(p, 0);
5080       sqlite3_value *pVal = sqlite3_column_value(p, 1);
5081       if( 0==sqlite3_stricmp(zK, "version") ){
5082         iVersion = sqlite3_value_int(pVal);
5083       }else{
5084         int bDummy = 0;
5085         sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy);
5086       }
5087     }
5088     rc = sqlite3_finalize(p);
5089   }
5090 
5091   if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){
5092     rc = SQLITE_ERROR;
5093     if( pConfig->pzErrmsg ){
5094       assert( 0==*pConfig->pzErrmsg );
5095       *pConfig->pzErrmsg = sqlite3_mprintf(
5096           "invalid fts5 file format (found %d, expected %d) - run 'rebuild'",
5097           iVersion, FTS5_CURRENT_VERSION
5098       );
5099     }
5100   }
5101 
5102   if( rc==SQLITE_OK ){
5103     pConfig->iCookie = iCookie;
5104   }
5105   return rc;
5106 }
5107 
5108 #line 1 "fts5_expr.c"
5109 /*
5110 ** 2014 May 31
5111 **
5112 ** The author disclaims copyright to this source code.  In place of
5113 ** a legal notice, here is a blessing:
5114 **
5115 **    May you do good and not evil.
5116 **    May you find forgiveness for yourself and forgive others.
5117 **    May you share freely, never taking more than you give.
5118 **
5119 ******************************************************************************
5120 **
5121 */
5122 
5123 
5124 
5125 /* #include "fts5Int.h" */
5126 /* #include "fts5parse.h" */
5127 
5128 /*
5129 ** All token types in the generated fts5parse.h file are greater than 0.
5130 */
5131 #define FTS5_EOF 0
5132 
5133 #define FTS5_LARGEST_INT64  (0xffffffff|(((i64)0x7fffffff)<<32))
5134 
5135 typedef struct Fts5ExprTerm Fts5ExprTerm;
5136 
5137 /*
5138 ** Functions generated by lemon from fts5parse.y.
5139 */
5140 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64));
5141 static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*));
5142 static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*);
5143 #ifndef NDEBUG
5144 #include <stdio.h>
5145 static void sqlite3Fts5ParserTrace(FILE*, char*);
5146 #endif
5147 static int sqlite3Fts5ParserFallback(int);
5148 
5149 
5150 struct Fts5Expr {
5151   Fts5Index *pIndex;
5152   Fts5Config *pConfig;
5153   Fts5ExprNode *pRoot;
5154   int bDesc;                      /* Iterate in descending rowid order */
5155   int nPhrase;                    /* Number of phrases in expression */
5156   Fts5ExprPhrase **apExprPhrase;  /* Pointers to phrase objects */
5157 };
5158 
5159 /*
5160 ** eType:
5161 **   Expression node type. Always one of:
5162 **
5163 **       FTS5_AND                 (nChild, apChild valid)
5164 **       FTS5_OR                  (nChild, apChild valid)
5165 **       FTS5_NOT                 (nChild, apChild valid)
5166 **       FTS5_STRING              (pNear valid)
5167 **       FTS5_TERM                (pNear valid)
5168 */
5169 struct Fts5ExprNode {
5170   int eType;                      /* Node type */
5171   int bEof;                       /* True at EOF */
5172   int bNomatch;                   /* True if entry is not a match */
5173 
5174   /* Next method for this node. */
5175   int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64);
5176 
5177   i64 iRowid;                     /* Current rowid */
5178   Fts5ExprNearset *pNear;         /* For FTS5_STRING - cluster of phrases */
5179 
5180   /* Child nodes. For a NOT node, this array always contains 2 entries. For
5181   ** AND or OR nodes, it contains 2 or more entries.  */
5182   int nChild;                     /* Number of child nodes */
5183   Fts5ExprNode *apChild[1];       /* Array of child nodes */
5184 };
5185 
5186 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING)
5187 
5188 /*
5189 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be
5190 ** used as if it has the same signature as the xNext() methods themselves.
5191 */
5192 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d))
5193 
5194 /*
5195 ** An instance of the following structure represents a single search term
5196 ** or term prefix.
5197 */
5198 struct Fts5ExprTerm {
5199   u8 bPrefix;                     /* True for a prefix term */
5200   u8 bFirst;                      /* True if token must be first in column */
5201   char *zTerm;                    /* nul-terminated term */
5202   Fts5IndexIter *pIter;           /* Iterator for this term */
5203   Fts5ExprTerm *pSynonym;         /* Pointer to first in list of synonyms */
5204 };
5205 
5206 /*
5207 ** A phrase. One or more terms that must appear in a contiguous sequence
5208 ** within a document for it to match.
5209 */
5210 struct Fts5ExprPhrase {
5211   Fts5ExprNode *pNode;            /* FTS5_STRING node this phrase is part of */
5212   Fts5Buffer poslist;             /* Current position list */
5213   int nTerm;                      /* Number of entries in aTerm[] */
5214   Fts5ExprTerm aTerm[1];          /* Terms that make up this phrase */
5215 };
5216 
5217 /*
5218 ** One or more phrases that must appear within a certain token distance of
5219 ** each other within each matching document.
5220 */
5221 struct Fts5ExprNearset {
5222   int nNear;                      /* NEAR parameter */
5223   Fts5Colset *pColset;            /* Columns to search (NULL -> all columns) */
5224   int nPhrase;                    /* Number of entries in aPhrase[] array */
5225   Fts5ExprPhrase *apPhrase[1];    /* Array of phrase pointers */
5226 };
5227 
5228 
5229 /*
5230 ** Parse context.
5231 */
5232 struct Fts5Parse {
5233   Fts5Config *pConfig;
5234   char *zErr;
5235   int rc;
5236   int nPhrase;                    /* Size of apPhrase array */
5237   Fts5ExprPhrase **apPhrase;      /* Array of all phrases */
5238   Fts5ExprNode *pExpr;            /* Result of a successful parse */
5239   int bPhraseToAnd;               /* Convert "a+b" to "a AND b" */
5240 };
5241 
5242 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){
5243   va_list ap;
5244   va_start(ap, zFmt);
5245   if( pParse->rc==SQLITE_OK ){
5246     assert( pParse->zErr==0 );
5247     pParse->zErr = sqlite3_vmprintf(zFmt, ap);
5248     pParse->rc = SQLITE_ERROR;
5249   }
5250   va_end(ap);
5251 }
5252 
5253 static int fts5ExprIsspace(char t){
5254   return t==' ' || t=='\t' || t=='\n' || t=='\r';
5255 }
5256 
5257 /*
5258 ** Read the first token from the nul-terminated string at *pz.
5259 */
5260 static int fts5ExprGetToken(
5261   Fts5Parse *pParse,
5262   const char **pz,                /* IN/OUT: Pointer into buffer */
5263   Fts5Token *pToken
5264 ){
5265   const char *z = *pz;
5266   int tok;
5267 
5268   /* Skip past any whitespace */
5269   while( fts5ExprIsspace(*z) ) z++;
5270 
5271   pToken->p = z;
5272   pToken->n = 1;
5273   switch( *z ){
5274     case '(':  tok = FTS5_LP;    break;
5275     case ')':  tok = FTS5_RP;    break;
5276     case '{':  tok = FTS5_LCP;   break;
5277     case '}':  tok = FTS5_RCP;   break;
5278     case ':':  tok = FTS5_COLON; break;
5279     case ',':  tok = FTS5_COMMA; break;
5280     case '+':  tok = FTS5_PLUS;  break;
5281     case '*':  tok = FTS5_STAR;  break;
5282     case '-':  tok = FTS5_MINUS; break;
5283     case '^':  tok = FTS5_CARET; break;
5284     case '\0': tok = FTS5_EOF;   break;
5285 
5286     case '"': {
5287       const char *z2;
5288       tok = FTS5_STRING;
5289 
5290       for(z2=&z[1]; 1; z2++){
5291         if( z2[0]=='"' ){
5292           z2++;
5293           if( z2[0]!='"' ) break;
5294         }
5295         if( z2[0]=='\0' ){
5296           sqlite3Fts5ParseError(pParse, "unterminated string");
5297           return FTS5_EOF;
5298         }
5299       }
5300       pToken->n = (z2 - z);
5301       break;
5302     }
5303 
5304     default: {
5305       const char *z2;
5306       if( sqlite3Fts5IsBareword(z[0])==0 ){
5307         sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z);
5308         return FTS5_EOF;
5309       }
5310       tok = FTS5_STRING;
5311       for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++);
5312       pToken->n = (z2 - z);
5313       if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 )  tok = FTS5_OR;
5314       if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT;
5315       if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND;
5316       break;
5317     }
5318   }
5319 
5320   *pz = &pToken->p[pToken->n];
5321   return tok;
5322 }
5323 
5324 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);}
5325 static void fts5ParseFree(void *p){ sqlite3_free(p); }
5326 
5327 static int sqlite3Fts5ExprNew(
5328   Fts5Config *pConfig,            /* FTS5 Configuration */
5329   int bPhraseToAnd,
5330   int iCol,
5331   const char *zExpr,              /* Expression text */
5332   Fts5Expr **ppNew,
5333   char **pzErr
5334 ){
5335   Fts5Parse sParse;
5336   Fts5Token token;
5337   const char *z = zExpr;
5338   int t;                          /* Next token type */
5339   void *pEngine;
5340   Fts5Expr *pNew;
5341 
5342   *ppNew = 0;
5343   *pzErr = 0;
5344   memset(&sParse, 0, sizeof(sParse));
5345   sParse.bPhraseToAnd = bPhraseToAnd;
5346   pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc);
5347   if( pEngine==0 ){ return SQLITE_NOMEM; }
5348   sParse.pConfig = pConfig;
5349 
5350   do {
5351     t = fts5ExprGetToken(&sParse, &z, &token);
5352     sqlite3Fts5Parser(pEngine, t, token, &sParse);
5353   }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF );
5354   sqlite3Fts5ParserFree(pEngine, fts5ParseFree);
5355 
5356   /* If the LHS of the MATCH expression was a user column, apply the
5357   ** implicit column-filter.  */
5358   if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){
5359     int n = sizeof(Fts5Colset);
5360     Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n);
5361     if( pColset ){
5362       pColset->nCol = 1;
5363       pColset->aiCol[0] = iCol;
5364       sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset);
5365     }
5366   }
5367 
5368   assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 );
5369   if( sParse.rc==SQLITE_OK ){
5370     *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr));
5371     if( pNew==0 ){
5372       sParse.rc = SQLITE_NOMEM;
5373       sqlite3Fts5ParseNodeFree(sParse.pExpr);
5374     }else{
5375       if( !sParse.pExpr ){
5376         const int nByte = sizeof(Fts5ExprNode);
5377         pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte);
5378         if( pNew->pRoot ){
5379           pNew->pRoot->bEof = 1;
5380         }
5381       }else{
5382         pNew->pRoot = sParse.pExpr;
5383       }
5384       pNew->pIndex = 0;
5385       pNew->pConfig = pConfig;
5386       pNew->apExprPhrase = sParse.apPhrase;
5387       pNew->nPhrase = sParse.nPhrase;
5388       pNew->bDesc = 0;
5389       sParse.apPhrase = 0;
5390     }
5391   }else{
5392     sqlite3Fts5ParseNodeFree(sParse.pExpr);
5393   }
5394 
5395   sqlite3_free(sParse.apPhrase);
5396   *pzErr = sParse.zErr;
5397   return sParse.rc;
5398 }
5399 
5400 /*
5401 ** This function is only called when using the special 'trigram' tokenizer.
5402 ** Argument zText contains the text of a LIKE or GLOB pattern matched
5403 ** against column iCol. This function creates and compiles an FTS5 MATCH
5404 ** expression that will match a superset of the rows matched by the LIKE or
5405 ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error
5406 ** code.
5407 */
5408 static int sqlite3Fts5ExprPattern(
5409   Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp
5410 ){
5411   i64 nText = strlen(zText);
5412   char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1);
5413   int rc = SQLITE_OK;
5414 
5415   if( zExpr==0 ){
5416     rc = SQLITE_NOMEM;
5417   }else{
5418     char aSpec[3];
5419     int iOut = 0;
5420     int i = 0;
5421     int iFirst = 0;
5422 
5423     if( bGlob==0 ){
5424       aSpec[0] = '_';
5425       aSpec[1] = '%';
5426       aSpec[2] = 0;
5427     }else{
5428       aSpec[0] = '*';
5429       aSpec[1] = '?';
5430       aSpec[2] = '[';
5431     }
5432 
5433     while( i<=nText ){
5434       if( i==nText
5435        || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2]
5436       ){
5437         if( i-iFirst>=3 ){
5438           int jj;
5439           zExpr[iOut++] = '"';
5440           for(jj=iFirst; jj<i; jj++){
5441             zExpr[iOut++] = zText[jj];
5442             if( zText[jj]=='"' ) zExpr[iOut++] = '"';
5443           }
5444           zExpr[iOut++] = '"';
5445           zExpr[iOut++] = ' ';
5446         }
5447         if( zText[i]==aSpec[2] ){
5448           i += 2;
5449           if( zText[i-1]=='^' ) i++;
5450           while( i<nText && zText[i]!=']' ) i++;
5451         }
5452         iFirst = i+1;
5453       }
5454       i++;
5455     }
5456     if( iOut>0 ){
5457       int bAnd = 0;
5458       if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
5459         bAnd = 1;
5460         if( pConfig->eDetail==FTS5_DETAIL_NONE ){
5461           iCol = pConfig->nCol;
5462         }
5463       }
5464       zExpr[iOut] = '\0';
5465       rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg);
5466     }else{
5467       *pp = 0;
5468     }
5469     sqlite3_free(zExpr);
5470   }
5471 
5472   return rc;
5473 }
5474 
5475 /*
5476 ** Free the expression node object passed as the only argument.
5477 */
5478 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){
5479   if( p ){
5480     int i;
5481     for(i=0; i<p->nChild; i++){
5482       sqlite3Fts5ParseNodeFree(p->apChild[i]);
5483     }
5484     sqlite3Fts5ParseNearsetFree(p->pNear);
5485     sqlite3_free(p);
5486   }
5487 }
5488 
5489 /*
5490 ** Free the expression object passed as the only argument.
5491 */
5492 static void sqlite3Fts5ExprFree(Fts5Expr *p){
5493   if( p ){
5494     sqlite3Fts5ParseNodeFree(p->pRoot);
5495     sqlite3_free(p->apExprPhrase);
5496     sqlite3_free(p);
5497   }
5498 }
5499 
5500 static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){
5501   Fts5Parse sParse;
5502   memset(&sParse, 0, sizeof(sParse));
5503 
5504   if( *pp1 ){
5505     Fts5Expr *p1 = *pp1;
5506     int nPhrase = p1->nPhrase + p2->nPhrase;
5507 
5508     p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0);
5509     p2->pRoot = 0;
5510 
5511     if( sParse.rc==SQLITE_OK ){
5512       Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc(
5513           p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*)
5514       );
5515       if( ap==0 ){
5516         sParse.rc = SQLITE_NOMEM;
5517       }else{
5518         int i;
5519         memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*));
5520         for(i=0; i<p2->nPhrase; i++){
5521           ap[i] = p2->apExprPhrase[i];
5522         }
5523         p1->nPhrase = nPhrase;
5524         p1->apExprPhrase = ap;
5525       }
5526     }
5527     sqlite3_free(p2->apExprPhrase);
5528     sqlite3_free(p2);
5529   }else{
5530     *pp1 = p2;
5531   }
5532 
5533   return sParse.rc;
5534 }
5535 
5536 /*
5537 ** Argument pTerm must be a synonym iterator. Return the current rowid
5538 ** that it points to.
5539 */
5540 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){
5541   i64 iRet = 0;
5542   int bRetValid = 0;
5543   Fts5ExprTerm *p;
5544 
5545   assert( pTerm );
5546   assert( pTerm->pSynonym );
5547   assert( bDesc==0 || bDesc==1 );
5548   for(p=pTerm; p; p=p->pSynonym){
5549     if( 0==sqlite3Fts5IterEof(p->pIter) ){
5550       i64 iRowid = p->pIter->iRowid;
5551       if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){
5552         iRet = iRowid;
5553         bRetValid = 1;
5554       }
5555     }
5556   }
5557 
5558   if( pbEof && bRetValid==0 ) *pbEof = 1;
5559   return iRet;
5560 }
5561 
5562 /*
5563 ** Argument pTerm must be a synonym iterator.
5564 */
5565 static int fts5ExprSynonymList(
5566   Fts5ExprTerm *pTerm,
5567   i64 iRowid,
5568   Fts5Buffer *pBuf,               /* Use this buffer for space if required */
5569   u8 **pa, int *pn
5570 ){
5571   Fts5PoslistReader aStatic[4];
5572   Fts5PoslistReader *aIter = aStatic;
5573   int nIter = 0;
5574   int nAlloc = 4;
5575   int rc = SQLITE_OK;
5576   Fts5ExprTerm *p;
5577 
5578   assert( pTerm->pSynonym );
5579   for(p=pTerm; p; p=p->pSynonym){
5580     Fts5IndexIter *pIter = p->pIter;
5581     if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){
5582       if( pIter->nData==0 ) continue;
5583       if( nIter==nAlloc ){
5584         sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2;
5585         Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
5586         if( aNew==0 ){
5587           rc = SQLITE_NOMEM;
5588           goto synonym_poslist_out;
5589         }
5590         memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter);
5591         nAlloc = nAlloc*2;
5592         if( aIter!=aStatic ) sqlite3_free(aIter);
5593         aIter = aNew;
5594       }
5595       sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]);
5596       assert( aIter[nIter].bEof==0 );
5597       nIter++;
5598     }
5599   }
5600 
5601   if( nIter==1 ){
5602     *pa = (u8*)aIter[0].a;
5603     *pn = aIter[0].n;
5604   }else{
5605     Fts5PoslistWriter writer = {0};
5606     i64 iPrev = -1;
5607     fts5BufferZero(pBuf);
5608     while( 1 ){
5609       int i;
5610       i64 iMin = FTS5_LARGEST_INT64;
5611       for(i=0; i<nIter; i++){
5612         if( aIter[i].bEof==0 ){
5613           if( aIter[i].iPos==iPrev ){
5614             if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue;
5615           }
5616           if( aIter[i].iPos<iMin ){
5617             iMin = aIter[i].iPos;
5618           }
5619         }
5620       }
5621       if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break;
5622       rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin);
5623       iPrev = iMin;
5624     }
5625     if( rc==SQLITE_OK ){
5626       *pa = pBuf->p;
5627       *pn = pBuf->n;
5628     }
5629   }
5630 
5631  synonym_poslist_out:
5632   if( aIter!=aStatic ) sqlite3_free(aIter);
5633   return rc;
5634 }
5635 
5636 
5637 /*
5638 ** All individual term iterators in pPhrase are guaranteed to be valid and
5639 ** pointing to the same rowid when this function is called. This function
5640 ** checks if the current rowid really is a match, and if so populates
5641 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch
5642 ** is set to true if this is really a match, or false otherwise.
5643 **
5644 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
5645 ** otherwise. It is not considered an error code if the current rowid is
5646 ** not a match.
5647 */
5648 static int fts5ExprPhraseIsMatch(
5649   Fts5ExprNode *pNode,            /* Node pPhrase belongs to */
5650   Fts5ExprPhrase *pPhrase,        /* Phrase object to initialize */
5651   int *pbMatch                    /* OUT: Set to true if really a match */
5652 ){
5653   Fts5PoslistWriter writer = {0};
5654   Fts5PoslistReader aStatic[4];
5655   Fts5PoslistReader *aIter = aStatic;
5656   int i;
5657   int rc = SQLITE_OK;
5658   int bFirst = pPhrase->aTerm[0].bFirst;
5659 
5660   fts5BufferZero(&pPhrase->poslist);
5661 
5662   /* If the aStatic[] array is not large enough, allocate a large array
5663   ** using sqlite3_malloc(). This approach could be improved upon. */
5664   if( pPhrase->nTerm>ArraySize(aStatic) ){
5665     sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm;
5666     aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte);
5667     if( !aIter ) return SQLITE_NOMEM;
5668   }
5669   memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm);
5670 
5671   /* Initialize a term iterator for each term in the phrase */
5672   for(i=0; i<pPhrase->nTerm; i++){
5673     Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
5674     int n = 0;
5675     int bFlag = 0;
5676     u8 *a = 0;
5677     if( pTerm->pSynonym ){
5678       Fts5Buffer buf = {0, 0, 0};
5679       rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n);
5680       if( rc ){
5681         sqlite3_free(a);
5682         goto ismatch_out;
5683       }
5684       if( a==buf.p ) bFlag = 1;
5685     }else{
5686       a = (u8*)pTerm->pIter->pData;
5687       n = pTerm->pIter->nData;
5688     }
5689     sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
5690     aIter[i].bFlag = (u8)bFlag;
5691     if( aIter[i].bEof ) goto ismatch_out;
5692   }
5693 
5694   while( 1 ){
5695     int bMatch;
5696     i64 iPos = aIter[0].iPos;
5697     do {
5698       bMatch = 1;
5699       for(i=0; i<pPhrase->nTerm; i++){
5700         Fts5PoslistReader *pPos = &aIter[i];
5701         i64 iAdj = iPos + i;
5702         if( pPos->iPos!=iAdj ){
5703           bMatch = 0;
5704           while( pPos->iPos<iAdj ){
5705             if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out;
5706           }
5707           if( pPos->iPos>iAdj ) iPos = pPos->iPos-i;
5708         }
5709       }
5710     }while( bMatch==0 );
5711 
5712     /* Append position iPos to the output */
5713     if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){
5714       rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos);
5715       if( rc!=SQLITE_OK ) goto ismatch_out;
5716     }
5717 
5718     for(i=0; i<pPhrase->nTerm; i++){
5719       if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out;
5720     }
5721   }
5722 
5723  ismatch_out:
5724   *pbMatch = (pPhrase->poslist.n>0);
5725   for(i=0; i<pPhrase->nTerm; i++){
5726     if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a);
5727   }
5728   if( aIter!=aStatic ) sqlite3_free(aIter);
5729   return rc;
5730 }
5731 
5732 typedef struct Fts5LookaheadReader Fts5LookaheadReader;
5733 struct Fts5LookaheadReader {
5734   const u8 *a;                    /* Buffer containing position list */
5735   int n;                          /* Size of buffer a[] in bytes */
5736   int i;                          /* Current offset in position list */
5737   i64 iPos;                       /* Current position */
5738   i64 iLookahead;                 /* Next position */
5739 };
5740 
5741 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62)
5742 
5743 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){
5744   p->iPos = p->iLookahead;
5745   if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){
5746     p->iLookahead = FTS5_LOOKAHEAD_EOF;
5747   }
5748   return (p->iPos==FTS5_LOOKAHEAD_EOF);
5749 }
5750 
5751 static int fts5LookaheadReaderInit(
5752   const u8 *a, int n,             /* Buffer to read position list from */
5753   Fts5LookaheadReader *p          /* Iterator object to initialize */
5754 ){
5755   memset(p, 0, sizeof(Fts5LookaheadReader));
5756   p->a = a;
5757   p->n = n;
5758   fts5LookaheadReaderNext(p);
5759   return fts5LookaheadReaderNext(p);
5760 }
5761 
5762 typedef struct Fts5NearTrimmer Fts5NearTrimmer;
5763 struct Fts5NearTrimmer {
5764   Fts5LookaheadReader reader;     /* Input iterator */
5765   Fts5PoslistWriter writer;       /* Writer context */
5766   Fts5Buffer *pOut;               /* Output poslist */
5767 };
5768 
5769 /*
5770 ** The near-set object passed as the first argument contains more than
5771 ** one phrase. All phrases currently point to the same row. The
5772 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function
5773 ** tests if the current row contains instances of each phrase sufficiently
5774 ** close together to meet the NEAR constraint. Non-zero is returned if it
5775 ** does, or zero otherwise.
5776 **
5777 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this
5778 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM)
5779 ** occurs within this function (*pRc) is set accordingly before returning.
5780 ** The return value is undefined in both these cases.
5781 **
5782 ** If no error occurs and non-zero (a match) is returned, the position-list
5783 ** of each phrase object is edited to contain only those entries that
5784 ** meet the constraint before returning.
5785 */
5786 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){
5787   Fts5NearTrimmer aStatic[4];
5788   Fts5NearTrimmer *a = aStatic;
5789   Fts5ExprPhrase **apPhrase = pNear->apPhrase;
5790 
5791   int i;
5792   int rc = *pRc;
5793   int bMatch;
5794 
5795   assert( pNear->nPhrase>1 );
5796 
5797   /* If the aStatic[] array is not large enough, allocate a large array
5798   ** using sqlite3_malloc(). This approach could be improved upon. */
5799   if( pNear->nPhrase>ArraySize(aStatic) ){
5800     sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase;
5801     a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte);
5802   }else{
5803     memset(aStatic, 0, sizeof(aStatic));
5804   }
5805   if( rc!=SQLITE_OK ){
5806     *pRc = rc;
5807     return 0;
5808   }
5809 
5810   /* Initialize a lookahead iterator for each phrase. After passing the
5811   ** buffer and buffer size to the lookaside-reader init function, zero
5812   ** the phrase poslist buffer. The new poslist for the phrase (containing
5813   ** the same entries as the original with some entries removed on account
5814   ** of the NEAR constraint) is written over the original even as it is
5815   ** being read. This is safe as the entries for the new poslist are a
5816   ** subset of the old, so it is not possible for data yet to be read to
5817   ** be overwritten.  */
5818   for(i=0; i<pNear->nPhrase; i++){
5819     Fts5Buffer *pPoslist = &apPhrase[i]->poslist;
5820     fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader);
5821     pPoslist->n = 0;
5822     a[i].pOut = pPoslist;
5823   }
5824 
5825   while( 1 ){
5826     int iAdv;
5827     i64 iMin;
5828     i64 iMax;
5829 
5830     /* This block advances the phrase iterators until they point to a set of
5831     ** entries that together comprise a match.  */
5832     iMax = a[0].reader.iPos;
5833     do {
5834       bMatch = 1;
5835       for(i=0; i<pNear->nPhrase; i++){
5836         Fts5LookaheadReader *pPos = &a[i].reader;
5837         iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear;
5838         if( pPos->iPos<iMin || pPos->iPos>iMax ){
5839           bMatch = 0;
5840           while( pPos->iPos<iMin ){
5841             if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out;
5842           }
5843           if( pPos->iPos>iMax ) iMax = pPos->iPos;
5844         }
5845       }
5846     }while( bMatch==0 );
5847 
5848     /* Add an entry to each output position list */
5849     for(i=0; i<pNear->nPhrase; i++){
5850       i64 iPos = a[i].reader.iPos;
5851       Fts5PoslistWriter *pWriter = &a[i].writer;
5852       if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){
5853         sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos);
5854       }
5855     }
5856 
5857     iAdv = 0;
5858     iMin = a[0].reader.iLookahead;
5859     for(i=0; i<pNear->nPhrase; i++){
5860       if( a[i].reader.iLookahead < iMin ){
5861         iMin = a[i].reader.iLookahead;
5862         iAdv = i;
5863       }
5864     }
5865     if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out;
5866   }
5867 
5868   ismatch_out: {
5869     int bRet = a[0].pOut->n>0;
5870     *pRc = rc;
5871     if( a!=aStatic ) sqlite3_free(a);
5872     return bRet;
5873   }
5874 }
5875 
5876 /*
5877 ** Advance iterator pIter until it points to a value equal to or laster
5878 ** than the initial value of *piLast. If this means the iterator points
5879 ** to a value laster than *piLast, update *piLast to the new lastest value.
5880 **
5881 ** If the iterator reaches EOF, set *pbEof to true before returning. If
5882 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc
5883 ** are set, return a non-zero value. Otherwise, return zero.
5884 */
5885 static int fts5ExprAdvanceto(
5886   Fts5IndexIter *pIter,           /* Iterator to advance */
5887   int bDesc,                      /* True if iterator is "rowid DESC" */
5888   i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
5889   int *pRc,                       /* OUT: Error code */
5890   int *pbEof                      /* OUT: Set to true if EOF */
5891 ){
5892   i64 iLast = *piLast;
5893   i64 iRowid;
5894 
5895   iRowid = pIter->iRowid;
5896   if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5897     int rc = sqlite3Fts5IterNextFrom(pIter, iLast);
5898     if( rc || sqlite3Fts5IterEof(pIter) ){
5899       *pRc = rc;
5900       *pbEof = 1;
5901       return 1;
5902     }
5903     iRowid = pIter->iRowid;
5904     assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) );
5905   }
5906   *piLast = iRowid;
5907 
5908   return 0;
5909 }
5910 
5911 static int fts5ExprSynonymAdvanceto(
5912   Fts5ExprTerm *pTerm,            /* Term iterator to advance */
5913   int bDesc,                      /* True if iterator is "rowid DESC" */
5914   i64 *piLast,                    /* IN/OUT: Lastest rowid seen so far */
5915   int *pRc                        /* OUT: Error code */
5916 ){
5917   int rc = SQLITE_OK;
5918   i64 iLast = *piLast;
5919   Fts5ExprTerm *p;
5920   int bEof = 0;
5921 
5922   for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){
5923     if( sqlite3Fts5IterEof(p->pIter)==0 ){
5924       i64 iRowid = p->pIter->iRowid;
5925       if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){
5926         rc = sqlite3Fts5IterNextFrom(p->pIter, iLast);
5927       }
5928     }
5929   }
5930 
5931   if( rc!=SQLITE_OK ){
5932     *pRc = rc;
5933     bEof = 1;
5934   }else{
5935     *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof);
5936   }
5937   return bEof;
5938 }
5939 
5940 
5941 static int fts5ExprNearTest(
5942   int *pRc,
5943   Fts5Expr *pExpr,                /* Expression that pNear is a part of */
5944   Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_STRING) */
5945 ){
5946   Fts5ExprNearset *pNear = pNode->pNear;
5947   int rc = *pRc;
5948 
5949   if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){
5950     Fts5ExprTerm *pTerm;
5951     Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
5952     pPhrase->poslist.n = 0;
5953     for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
5954       Fts5IndexIter *pIter = pTerm->pIter;
5955       if( sqlite3Fts5IterEof(pIter)==0 ){
5956         if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){
5957           pPhrase->poslist.n = 1;
5958         }
5959       }
5960     }
5961     return pPhrase->poslist.n;
5962   }else{
5963     int i;
5964 
5965     /* Check that each phrase in the nearset matches the current row.
5966     ** Populate the pPhrase->poslist buffers at the same time. If any
5967     ** phrase is not a match, break out of the loop early.  */
5968     for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){
5969       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
5970       if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym
5971        || pNear->pColset || pPhrase->aTerm[0].bFirst
5972       ){
5973         int bMatch = 0;
5974         rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch);
5975         if( bMatch==0 ) break;
5976       }else{
5977         Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
5978         fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData);
5979       }
5980     }
5981 
5982     *pRc = rc;
5983     if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){
5984       return 1;
5985     }
5986     return 0;
5987   }
5988 }
5989 
5990 
5991 /*
5992 ** Initialize all term iterators in the pNear object. If any term is found
5993 ** to match no documents at all, return immediately without initializing any
5994 ** further iterators.
5995 **
5996 ** If an error occurs, return an SQLite error code. Otherwise, return
5997 ** SQLITE_OK. It is not considered an error if some term matches zero
5998 ** documents.
5999 */
6000 static int fts5ExprNearInitAll(
6001   Fts5Expr *pExpr,
6002   Fts5ExprNode *pNode
6003 ){
6004   Fts5ExprNearset *pNear = pNode->pNear;
6005   int i;
6006 
6007   assert( pNode->bNomatch==0 );
6008   for(i=0; i<pNear->nPhrase; i++){
6009     Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6010     if( pPhrase->nTerm==0 ){
6011       pNode->bEof = 1;
6012       return SQLITE_OK;
6013     }else{
6014       int j;
6015       for(j=0; j<pPhrase->nTerm; j++){
6016         Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6017         Fts5ExprTerm *p;
6018         int bHit = 0;
6019 
6020         for(p=pTerm; p; p=p->pSynonym){
6021           int rc;
6022           if( p->pIter ){
6023             sqlite3Fts5IterClose(p->pIter);
6024             p->pIter = 0;
6025           }
6026           rc = sqlite3Fts5IndexQuery(
6027               pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm),
6028               (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) |
6029               (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0),
6030               pNear->pColset,
6031               &p->pIter
6032           );
6033           assert( (rc==SQLITE_OK)==(p->pIter!=0) );
6034           if( rc!=SQLITE_OK ) return rc;
6035           if( 0==sqlite3Fts5IterEof(p->pIter) ){
6036             bHit = 1;
6037           }
6038         }
6039 
6040         if( bHit==0 ){
6041           pNode->bEof = 1;
6042           return SQLITE_OK;
6043         }
6044       }
6045     }
6046   }
6047 
6048   pNode->bEof = 0;
6049   return SQLITE_OK;
6050 }
6051 
6052 /*
6053 ** If pExpr is an ASC iterator, this function returns a value with the
6054 ** same sign as:
6055 **
6056 **   (iLhs - iRhs)
6057 **
6058 ** Otherwise, if this is a DESC iterator, the opposite is returned:
6059 **
6060 **   (iRhs - iLhs)
6061 */
6062 static int fts5RowidCmp(
6063   Fts5Expr *pExpr,
6064   i64 iLhs,
6065   i64 iRhs
6066 ){
6067   assert( pExpr->bDesc==0 || pExpr->bDesc==1 );
6068   if( pExpr->bDesc==0 ){
6069     if( iLhs<iRhs ) return -1;
6070     return (iLhs > iRhs);
6071   }else{
6072     if( iLhs>iRhs ) return -1;
6073     return (iLhs < iRhs);
6074   }
6075 }
6076 
6077 static void fts5ExprSetEof(Fts5ExprNode *pNode){
6078   int i;
6079   pNode->bEof = 1;
6080   pNode->bNomatch = 0;
6081   for(i=0; i<pNode->nChild; i++){
6082     fts5ExprSetEof(pNode->apChild[i]);
6083   }
6084 }
6085 
6086 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){
6087   if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
6088     Fts5ExprNearset *pNear = pNode->pNear;
6089     int i;
6090     for(i=0; i<pNear->nPhrase; i++){
6091       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6092       pPhrase->poslist.n = 0;
6093     }
6094   }else{
6095     int i;
6096     for(i=0; i<pNode->nChild; i++){
6097       fts5ExprNodeZeroPoslist(pNode->apChild[i]);
6098     }
6099   }
6100 }
6101 
6102 
6103 
6104 /*
6105 ** Compare the values currently indicated by the two nodes as follows:
6106 **
6107 **    res = (*p1) - (*p2)
6108 **
6109 ** Nodes that point to values that come later in the iteration order are
6110 ** considered to be larger. Nodes at EOF are the largest of all.
6111 **
6112 ** This means that if the iteration order is ASC, then numerically larger
6113 ** rowids are considered larger. Or if it is the default DESC, numerically
6114 ** smaller rowids are larger.
6115 */
6116 static int fts5NodeCompare(
6117   Fts5Expr *pExpr,
6118   Fts5ExprNode *p1,
6119   Fts5ExprNode *p2
6120 ){
6121   if( p2->bEof ) return -1;
6122   if( p1->bEof ) return +1;
6123   return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid);
6124 }
6125 
6126 /*
6127 ** All individual term iterators in pNear are guaranteed to be valid when
6128 ** this function is called. This function checks if all term iterators
6129 ** point to the same rowid, and if not, advances them until they do.
6130 ** If an EOF is reached before this happens, *pbEof is set to true before
6131 ** returning.
6132 **
6133 ** SQLITE_OK is returned if an error occurs, or an SQLite error code
6134 ** otherwise. It is not considered an error code if an iterator reaches
6135 ** EOF.
6136 */
6137 static int fts5ExprNodeTest_STRING(
6138   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
6139   Fts5ExprNode *pNode
6140 ){
6141   Fts5ExprNearset *pNear = pNode->pNear;
6142   Fts5ExprPhrase *pLeft = pNear->apPhrase[0];
6143   int rc = SQLITE_OK;
6144   i64 iLast;                      /* Lastest rowid any iterator points to */
6145   int i, j;                       /* Phrase and token index, respectively */
6146   int bMatch;                     /* True if all terms are at the same rowid */
6147   const int bDesc = pExpr->bDesc;
6148 
6149   /* Check that this node should not be FTS5_TERM */
6150   assert( pNear->nPhrase>1
6151        || pNear->apPhrase[0]->nTerm>1
6152        || pNear->apPhrase[0]->aTerm[0].pSynonym
6153        || pNear->apPhrase[0]->aTerm[0].bFirst
6154   );
6155 
6156   /* Initialize iLast, the "lastest" rowid any iterator points to. If the
6157   ** iterator skips through rowids in the default ascending order, this means
6158   ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it
6159   ** means the minimum rowid.  */
6160   if( pLeft->aTerm[0].pSynonym ){
6161     iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0);
6162   }else{
6163     iLast = pLeft->aTerm[0].pIter->iRowid;
6164   }
6165 
6166   do {
6167     bMatch = 1;
6168     for(i=0; i<pNear->nPhrase; i++){
6169       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
6170       for(j=0; j<pPhrase->nTerm; j++){
6171         Fts5ExprTerm *pTerm = &pPhrase->aTerm[j];
6172         if( pTerm->pSynonym ){
6173           i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0);
6174           if( iRowid==iLast ) continue;
6175           bMatch = 0;
6176           if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){
6177             pNode->bNomatch = 0;
6178             pNode->bEof = 1;
6179             return rc;
6180           }
6181         }else{
6182           Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter;
6183           if( pIter->iRowid==iLast || pIter->bEof ) continue;
6184           bMatch = 0;
6185           if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){
6186             return rc;
6187           }
6188         }
6189       }
6190     }
6191   }while( bMatch==0 );
6192 
6193   pNode->iRowid = iLast;
6194   pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK);
6195   assert( pNode->bEof==0 || pNode->bNomatch==0 );
6196 
6197   return rc;
6198 }
6199 
6200 /*
6201 ** Advance the first term iterator in the first phrase of pNear. Set output
6202 ** variable *pbEof to true if it reaches EOF or if an error occurs.
6203 **
6204 ** Return SQLITE_OK if successful, or an SQLite error code if an error
6205 ** occurs.
6206 */
6207 static int fts5ExprNodeNext_STRING(
6208   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
6209   Fts5ExprNode *pNode,            /* FTS5_STRING or FTS5_TERM node */
6210   int bFromValid,
6211   i64 iFrom
6212 ){
6213   Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0];
6214   int rc = SQLITE_OK;
6215 
6216   pNode->bNomatch = 0;
6217   if( pTerm->pSynonym ){
6218     int bEof = 1;
6219     Fts5ExprTerm *p;
6220 
6221     /* Find the firstest rowid any synonym points to. */
6222     i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0);
6223 
6224     /* Advance each iterator that currently points to iRowid. Or, if iFrom
6225     ** is valid - each iterator that points to a rowid before iFrom.  */
6226     for(p=pTerm; p; p=p->pSynonym){
6227       if( sqlite3Fts5IterEof(p->pIter)==0 ){
6228         i64 ii = p->pIter->iRowid;
6229         if( ii==iRowid
6230          || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc)
6231         ){
6232           if( bFromValid ){
6233             rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom);
6234           }else{
6235             rc = sqlite3Fts5IterNext(p->pIter);
6236           }
6237           if( rc!=SQLITE_OK ) break;
6238           if( sqlite3Fts5IterEof(p->pIter)==0 ){
6239             bEof = 0;
6240           }
6241         }else{
6242           bEof = 0;
6243         }
6244       }
6245     }
6246 
6247     /* Set the EOF flag if either all synonym iterators are at EOF or an
6248     ** error has occurred.  */
6249     pNode->bEof = (rc || bEof);
6250   }else{
6251     Fts5IndexIter *pIter = pTerm->pIter;
6252 
6253     assert( Fts5NodeIsString(pNode) );
6254     if( bFromValid ){
6255       rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6256     }else{
6257       rc = sqlite3Fts5IterNext(pIter);
6258     }
6259 
6260     pNode->bEof = (rc || sqlite3Fts5IterEof(pIter));
6261   }
6262 
6263   if( pNode->bEof==0 ){
6264     assert( rc==SQLITE_OK );
6265     rc = fts5ExprNodeTest_STRING(pExpr, pNode);
6266   }
6267 
6268   return rc;
6269 }
6270 
6271 
6272 static int fts5ExprNodeTest_TERM(
6273   Fts5Expr *pExpr,                /* Expression that pNear is a part of */
6274   Fts5ExprNode *pNode             /* The "NEAR" node (FTS5_TERM) */
6275 ){
6276   /* As this "NEAR" object is actually a single phrase that consists
6277   ** of a single term only, grab pointers into the poslist managed by the
6278   ** fts5_index.c iterator object. This is much faster than synthesizing
6279   ** a new poslist the way we have to for more complicated phrase or NEAR
6280   ** expressions.  */
6281   Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0];
6282   Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter;
6283 
6284   assert( pNode->eType==FTS5_TERM );
6285   assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 );
6286   assert( pPhrase->aTerm[0].pSynonym==0 );
6287 
6288   pPhrase->poslist.n = pIter->nData;
6289   if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){
6290     pPhrase->poslist.p = (u8*)pIter->pData;
6291   }
6292   pNode->iRowid = pIter->iRowid;
6293   pNode->bNomatch = (pPhrase->poslist.n==0);
6294   return SQLITE_OK;
6295 }
6296 
6297 /*
6298 ** xNext() method for a node of type FTS5_TERM.
6299 */
6300 static int fts5ExprNodeNext_TERM(
6301   Fts5Expr *pExpr,
6302   Fts5ExprNode *pNode,
6303   int bFromValid,
6304   i64 iFrom
6305 ){
6306   int rc;
6307   Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter;
6308 
6309   assert( pNode->bEof==0 );
6310   if( bFromValid ){
6311     rc = sqlite3Fts5IterNextFrom(pIter, iFrom);
6312   }else{
6313     rc = sqlite3Fts5IterNext(pIter);
6314   }
6315   if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){
6316     rc = fts5ExprNodeTest_TERM(pExpr, pNode);
6317   }else{
6318     pNode->bEof = 1;
6319     pNode->bNomatch = 0;
6320   }
6321   return rc;
6322 }
6323 
6324 static void fts5ExprNodeTest_OR(
6325   Fts5Expr *pExpr,                /* Expression of which pNode is a part */
6326   Fts5ExprNode *pNode             /* Expression node to test */
6327 ){
6328   Fts5ExprNode *pNext = pNode->apChild[0];
6329   int i;
6330 
6331   for(i=1; i<pNode->nChild; i++){
6332     Fts5ExprNode *pChild = pNode->apChild[i];
6333     int cmp = fts5NodeCompare(pExpr, pNext, pChild);
6334     if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){
6335       pNext = pChild;
6336     }
6337   }
6338   pNode->iRowid = pNext->iRowid;
6339   pNode->bEof = pNext->bEof;
6340   pNode->bNomatch = pNext->bNomatch;
6341 }
6342 
6343 static int fts5ExprNodeNext_OR(
6344   Fts5Expr *pExpr,
6345   Fts5ExprNode *pNode,
6346   int bFromValid,
6347   i64 iFrom
6348 ){
6349   int i;
6350   i64 iLast = pNode->iRowid;
6351 
6352   for(i=0; i<pNode->nChild; i++){
6353     Fts5ExprNode *p1 = pNode->apChild[i];
6354     assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 );
6355     if( p1->bEof==0 ){
6356       if( (p1->iRowid==iLast)
6357        || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0)
6358       ){
6359         int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom);
6360         if( rc!=SQLITE_OK ){
6361           pNode->bNomatch = 0;
6362           return rc;
6363         }
6364       }
6365     }
6366   }
6367 
6368   fts5ExprNodeTest_OR(pExpr, pNode);
6369   return SQLITE_OK;
6370 }
6371 
6372 /*
6373 ** Argument pNode is an FTS5_AND node.
6374 */
6375 static int fts5ExprNodeTest_AND(
6376   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
6377   Fts5ExprNode *pAnd              /* FTS5_AND node to advance */
6378 ){
6379   int iChild;
6380   i64 iLast = pAnd->iRowid;
6381   int rc = SQLITE_OK;
6382   int bMatch;
6383 
6384   assert( pAnd->bEof==0 );
6385   do {
6386     pAnd->bNomatch = 0;
6387     bMatch = 1;
6388     for(iChild=0; iChild<pAnd->nChild; iChild++){
6389       Fts5ExprNode *pChild = pAnd->apChild[iChild];
6390       int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid);
6391       if( cmp>0 ){
6392         /* Advance pChild until it points to iLast or laster */
6393         rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast);
6394         if( rc!=SQLITE_OK ){
6395           pAnd->bNomatch = 0;
6396           return rc;
6397         }
6398       }
6399 
6400       /* If the child node is now at EOF, so is the parent AND node. Otherwise,
6401       ** the child node is guaranteed to have advanced at least as far as
6402       ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the
6403       ** new lastest rowid seen so far.  */
6404       assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 );
6405       if( pChild->bEof ){
6406         fts5ExprSetEof(pAnd);
6407         bMatch = 1;
6408         break;
6409       }else if( iLast!=pChild->iRowid ){
6410         bMatch = 0;
6411         iLast = pChild->iRowid;
6412       }
6413 
6414       if( pChild->bNomatch ){
6415         pAnd->bNomatch = 1;
6416       }
6417     }
6418   }while( bMatch==0 );
6419 
6420   if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){
6421     fts5ExprNodeZeroPoslist(pAnd);
6422   }
6423   pAnd->iRowid = iLast;
6424   return SQLITE_OK;
6425 }
6426 
6427 static int fts5ExprNodeNext_AND(
6428   Fts5Expr *pExpr,
6429   Fts5ExprNode *pNode,
6430   int bFromValid,
6431   i64 iFrom
6432 ){
6433   int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
6434   if( rc==SQLITE_OK ){
6435     rc = fts5ExprNodeTest_AND(pExpr, pNode);
6436   }else{
6437     pNode->bNomatch = 0;
6438   }
6439   return rc;
6440 }
6441 
6442 static int fts5ExprNodeTest_NOT(
6443   Fts5Expr *pExpr,                /* Expression pPhrase belongs to */
6444   Fts5ExprNode *pNode             /* FTS5_NOT node to advance */
6445 ){
6446   int rc = SQLITE_OK;
6447   Fts5ExprNode *p1 = pNode->apChild[0];
6448   Fts5ExprNode *p2 = pNode->apChild[1];
6449   assert( pNode->nChild==2 );
6450 
6451   while( rc==SQLITE_OK && p1->bEof==0 ){
6452     int cmp = fts5NodeCompare(pExpr, p1, p2);
6453     if( cmp>0 ){
6454       rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid);
6455       cmp = fts5NodeCompare(pExpr, p1, p2);
6456     }
6457     assert( rc!=SQLITE_OK || cmp<=0 );
6458     if( cmp || p2->bNomatch ) break;
6459     rc = fts5ExprNodeNext(pExpr, p1, 0, 0);
6460   }
6461   pNode->bEof = p1->bEof;
6462   pNode->bNomatch = p1->bNomatch;
6463   pNode->iRowid = p1->iRowid;
6464   if( p1->bEof ){
6465     fts5ExprNodeZeroPoslist(p2);
6466   }
6467   return rc;
6468 }
6469 
6470 static int fts5ExprNodeNext_NOT(
6471   Fts5Expr *pExpr,
6472   Fts5ExprNode *pNode,
6473   int bFromValid,
6474   i64 iFrom
6475 ){
6476   int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom);
6477   if( rc==SQLITE_OK ){
6478     rc = fts5ExprNodeTest_NOT(pExpr, pNode);
6479   }
6480   if( rc!=SQLITE_OK ){
6481     pNode->bNomatch = 0;
6482   }
6483   return rc;
6484 }
6485 
6486 /*
6487 ** If pNode currently points to a match, this function returns SQLITE_OK
6488 ** without modifying it. Otherwise, pNode is advanced until it does point
6489 ** to a match or EOF is reached.
6490 */
6491 static int fts5ExprNodeTest(
6492   Fts5Expr *pExpr,                /* Expression of which pNode is a part */
6493   Fts5ExprNode *pNode             /* Expression node to test */
6494 ){
6495   int rc = SQLITE_OK;
6496   if( pNode->bEof==0 ){
6497     switch( pNode->eType ){
6498 
6499       case FTS5_STRING: {
6500         rc = fts5ExprNodeTest_STRING(pExpr, pNode);
6501         break;
6502       }
6503 
6504       case FTS5_TERM: {
6505         rc = fts5ExprNodeTest_TERM(pExpr, pNode);
6506         break;
6507       }
6508 
6509       case FTS5_AND: {
6510         rc = fts5ExprNodeTest_AND(pExpr, pNode);
6511         break;
6512       }
6513 
6514       case FTS5_OR: {
6515         fts5ExprNodeTest_OR(pExpr, pNode);
6516         break;
6517       }
6518 
6519       default: assert( pNode->eType==FTS5_NOT ); {
6520         rc = fts5ExprNodeTest_NOT(pExpr, pNode);
6521         break;
6522       }
6523     }
6524   }
6525   return rc;
6526 }
6527 
6528 
6529 /*
6530 ** Set node pNode, which is part of expression pExpr, to point to the first
6531 ** match. If there are no matches, set the Node.bEof flag to indicate EOF.
6532 **
6533 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise.
6534 ** It is not an error if there are no matches.
6535 */
6536 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){
6537   int rc = SQLITE_OK;
6538   pNode->bEof = 0;
6539   pNode->bNomatch = 0;
6540 
6541   if( Fts5NodeIsString(pNode) ){
6542     /* Initialize all term iterators in the NEAR object. */
6543     rc = fts5ExprNearInitAll(pExpr, pNode);
6544   }else if( pNode->xNext==0 ){
6545     pNode->bEof = 1;
6546   }else{
6547     int i;
6548     int nEof = 0;
6549     for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){
6550       Fts5ExprNode *pChild = pNode->apChild[i];
6551       rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]);
6552       assert( pChild->bEof==0 || pChild->bEof==1 );
6553       nEof += pChild->bEof;
6554     }
6555     pNode->iRowid = pNode->apChild[0]->iRowid;
6556 
6557     switch( pNode->eType ){
6558       case FTS5_AND:
6559         if( nEof>0 ) fts5ExprSetEof(pNode);
6560         break;
6561 
6562       case FTS5_OR:
6563         if( pNode->nChild==nEof ) fts5ExprSetEof(pNode);
6564         break;
6565 
6566       default:
6567         assert( pNode->eType==FTS5_NOT );
6568         pNode->bEof = pNode->apChild[0]->bEof;
6569         break;
6570     }
6571   }
6572 
6573   if( rc==SQLITE_OK ){
6574     rc = fts5ExprNodeTest(pExpr, pNode);
6575   }
6576   return rc;
6577 }
6578 
6579 
6580 /*
6581 ** Begin iterating through the set of documents in index pIdx matched by
6582 ** the MATCH expression passed as the first argument. If the "bDesc"
6583 ** parameter is passed a non-zero value, iteration is in descending rowid
6584 ** order. Or, if it is zero, in ascending order.
6585 **
6586 ** If iterating in ascending rowid order (bDesc==0), the first document
6587 ** visited is that with the smallest rowid that is larger than or equal
6588 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1),
6589 ** then the first document visited must have a rowid smaller than or
6590 ** equal to iFirst.
6591 **
6592 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6593 ** is not considered an error if the query does not match any documents.
6594 */
6595 static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){
6596   Fts5ExprNode *pRoot = p->pRoot;
6597   int rc;                         /* Return code */
6598 
6599   p->pIndex = pIdx;
6600   p->bDesc = bDesc;
6601   rc = fts5ExprNodeFirst(p, pRoot);
6602 
6603   /* If not at EOF but the current rowid occurs earlier than iFirst in
6604   ** the iteration order, move to document iFirst or later. */
6605   if( rc==SQLITE_OK
6606    && 0==pRoot->bEof
6607    && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0
6608   ){
6609     rc = fts5ExprNodeNext(p, pRoot, 1, iFirst);
6610   }
6611 
6612   /* If the iterator is not at a real match, skip forward until it is. */
6613   while( pRoot->bNomatch && rc==SQLITE_OK ){
6614     assert( pRoot->bEof==0 );
6615     rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6616   }
6617   return rc;
6618 }
6619 
6620 /*
6621 ** Move to the next document
6622 **
6623 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It
6624 ** is not considered an error if the query does not match any documents.
6625 */
6626 static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){
6627   int rc;
6628   Fts5ExprNode *pRoot = p->pRoot;
6629   assert( pRoot->bEof==0 && pRoot->bNomatch==0 );
6630   do {
6631     rc = fts5ExprNodeNext(p, pRoot, 0, 0);
6632     assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) );
6633   }while( pRoot->bNomatch );
6634   if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){
6635     pRoot->bEof = 1;
6636   }
6637   return rc;
6638 }
6639 
6640 static int sqlite3Fts5ExprEof(Fts5Expr *p){
6641   return p->pRoot->bEof;
6642 }
6643 
6644 static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){
6645   return p->pRoot->iRowid;
6646 }
6647 
6648 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){
6649   int rc = SQLITE_OK;
6650   *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n);
6651   return rc;
6652 }
6653 
6654 /*
6655 ** Free the phrase object passed as the only argument.
6656 */
6657 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){
6658   if( pPhrase ){
6659     int i;
6660     for(i=0; i<pPhrase->nTerm; i++){
6661       Fts5ExprTerm *pSyn;
6662       Fts5ExprTerm *pNext;
6663       Fts5ExprTerm *pTerm = &pPhrase->aTerm[i];
6664       sqlite3_free(pTerm->zTerm);
6665       sqlite3Fts5IterClose(pTerm->pIter);
6666       for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){
6667         pNext = pSyn->pSynonym;
6668         sqlite3Fts5IterClose(pSyn->pIter);
6669         fts5BufferFree((Fts5Buffer*)&pSyn[1]);
6670         sqlite3_free(pSyn);
6671       }
6672     }
6673     if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist);
6674     sqlite3_free(pPhrase);
6675   }
6676 }
6677 
6678 /*
6679 ** Set the "bFirst" flag on the first token of the phrase passed as the
6680 ** only argument.
6681 */
6682 static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){
6683   if( pPhrase && pPhrase->nTerm ){
6684     pPhrase->aTerm[0].bFirst = 1;
6685   }
6686 }
6687 
6688 /*
6689 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated
6690 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is
6691 ** appended to it and the results returned.
6692 **
6693 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and
6694 ** NULL returned.
6695 */
6696 static Fts5ExprNearset *sqlite3Fts5ParseNearset(
6697   Fts5Parse *pParse,              /* Parse context */
6698   Fts5ExprNearset *pNear,         /* Existing nearset, or NULL */
6699   Fts5ExprPhrase *pPhrase         /* Recently parsed phrase */
6700 ){
6701   const int SZALLOC = 8;
6702   Fts5ExprNearset *pRet = 0;
6703 
6704   if( pParse->rc==SQLITE_OK ){
6705     if( pPhrase==0 ){
6706       return pNear;
6707     }
6708     if( pNear==0 ){
6709       sqlite3_int64 nByte;
6710       nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*);
6711       pRet = sqlite3_malloc64(nByte);
6712       if( pRet==0 ){
6713         pParse->rc = SQLITE_NOMEM;
6714       }else{
6715         memset(pRet, 0, (size_t)nByte);
6716       }
6717     }else if( (pNear->nPhrase % SZALLOC)==0 ){
6718       int nNew = pNear->nPhrase + SZALLOC;
6719       sqlite3_int64 nByte;
6720 
6721       nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*);
6722       pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte);
6723       if( pRet==0 ){
6724         pParse->rc = SQLITE_NOMEM;
6725       }
6726     }else{
6727       pRet = pNear;
6728     }
6729   }
6730 
6731   if( pRet==0 ){
6732     assert( pParse->rc!=SQLITE_OK );
6733     sqlite3Fts5ParseNearsetFree(pNear);
6734     sqlite3Fts5ParsePhraseFree(pPhrase);
6735   }else{
6736     if( pRet->nPhrase>0 ){
6737       Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1];
6738       assert( pParse!=0 );
6739       assert( pParse->apPhrase!=0 );
6740       assert( pParse->nPhrase>=2 );
6741       assert( pLast==pParse->apPhrase[pParse->nPhrase-2] );
6742       if( pPhrase->nTerm==0 ){
6743         fts5ExprPhraseFree(pPhrase);
6744         pRet->nPhrase--;
6745         pParse->nPhrase--;
6746         pPhrase = pLast;
6747       }else if( pLast->nTerm==0 ){
6748         fts5ExprPhraseFree(pLast);
6749         pParse->apPhrase[pParse->nPhrase-2] = pPhrase;
6750         pParse->nPhrase--;
6751         pRet->nPhrase--;
6752       }
6753     }
6754     pRet->apPhrase[pRet->nPhrase++] = pPhrase;
6755   }
6756   return pRet;
6757 }
6758 
6759 typedef struct TokenCtx TokenCtx;
6760 struct TokenCtx {
6761   Fts5ExprPhrase *pPhrase;
6762   int rc;
6763 };
6764 
6765 /*
6766 ** Callback for tokenizing terms used by ParseTerm().
6767 */
6768 static int fts5ParseTokenize(
6769   void *pContext,                 /* Pointer to Fts5InsertCtx object */
6770   int tflags,                     /* Mask of FTS5_TOKEN_* flags */
6771   const char *pToken,             /* Buffer containing token */
6772   int nToken,                     /* Size of token in bytes */
6773   int iUnused1,                   /* Start offset of token */
6774   int iUnused2                    /* End offset of token */
6775 ){
6776   int rc = SQLITE_OK;
6777   const int SZALLOC = 8;
6778   TokenCtx *pCtx = (TokenCtx*)pContext;
6779   Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
6780 
6781   UNUSED_PARAM2(iUnused1, iUnused2);
6782 
6783   /* If an error has already occurred, this is a no-op */
6784   if( pCtx->rc!=SQLITE_OK ) return pCtx->rc;
6785   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
6786 
6787   if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){
6788     Fts5ExprTerm *pSyn;
6789     sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1;
6790     pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte);
6791     if( pSyn==0 ){
6792       rc = SQLITE_NOMEM;
6793     }else{
6794       memset(pSyn, 0, (size_t)nByte);
6795       pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer);
6796       memcpy(pSyn->zTerm, pToken, nToken);
6797       pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym;
6798       pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn;
6799     }
6800   }else{
6801     Fts5ExprTerm *pTerm;
6802     if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
6803       Fts5ExprPhrase *pNew;
6804       int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);
6805 
6806       pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase,
6807           sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew
6808       );
6809       if( pNew==0 ){
6810         rc = SQLITE_NOMEM;
6811       }else{
6812         if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase));
6813         pCtx->pPhrase = pPhrase = pNew;
6814         pNew->nTerm = nNew - SZALLOC;
6815       }
6816     }
6817 
6818     if( rc==SQLITE_OK ){
6819       pTerm = &pPhrase->aTerm[pPhrase->nTerm++];
6820       memset(pTerm, 0, sizeof(Fts5ExprTerm));
6821       pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken);
6822     }
6823   }
6824 
6825   pCtx->rc = rc;
6826   return rc;
6827 }
6828 
6829 
6830 /*
6831 ** Free the phrase object passed as the only argument.
6832 */
6833 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){
6834   fts5ExprPhraseFree(pPhrase);
6835 }
6836 
6837 /*
6838 ** Free the phrase object passed as the second argument.
6839 */
6840 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){
6841   if( pNear ){
6842     int i;
6843     for(i=0; i<pNear->nPhrase; i++){
6844       fts5ExprPhraseFree(pNear->apPhrase[i]);
6845     }
6846     sqlite3_free(pNear->pColset);
6847     sqlite3_free(pNear);
6848   }
6849 }
6850 
6851 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){
6852   assert( pParse->pExpr==0 );
6853   pParse->pExpr = p;
6854 }
6855 
6856 static int parseGrowPhraseArray(Fts5Parse *pParse){
6857   if( (pParse->nPhrase % 8)==0 ){
6858     sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8);
6859     Fts5ExprPhrase **apNew;
6860     apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte);
6861     if( apNew==0 ){
6862       pParse->rc = SQLITE_NOMEM;
6863       return SQLITE_NOMEM;
6864     }
6865     pParse->apPhrase = apNew;
6866   }
6867   return SQLITE_OK;
6868 }
6869 
6870 /*
6871 ** This function is called by the parser to process a string token. The
6872 ** string may or may not be quoted. In any case it is tokenized and a
6873 ** phrase object consisting of all tokens returned.
6874 */
6875 static Fts5ExprPhrase *sqlite3Fts5ParseTerm(
6876   Fts5Parse *pParse,              /* Parse context */
6877   Fts5ExprPhrase *pAppend,        /* Phrase to append to */
6878   Fts5Token *pToken,              /* String to tokenize */
6879   int bPrefix                     /* True if there is a trailing "*" */
6880 ){
6881   Fts5Config *pConfig = pParse->pConfig;
6882   TokenCtx sCtx;                  /* Context object passed to callback */
6883   int rc;                         /* Tokenize return code */
6884   char *z = 0;
6885 
6886   memset(&sCtx, 0, sizeof(TokenCtx));
6887   sCtx.pPhrase = pAppend;
6888 
6889   rc = fts5ParseStringFromToken(pToken, &z);
6890   if( rc==SQLITE_OK ){
6891     int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0);
6892     int n;
6893     sqlite3Fts5Dequote(z);
6894     n = (int)strlen(z);
6895     rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize);
6896   }
6897   sqlite3_free(z);
6898   if( rc || (rc = sCtx.rc) ){
6899     pParse->rc = rc;
6900     fts5ExprPhraseFree(sCtx.pPhrase);
6901     sCtx.pPhrase = 0;
6902   }else{
6903 
6904     if( pAppend==0 ){
6905       if( parseGrowPhraseArray(pParse) ){
6906         fts5ExprPhraseFree(sCtx.pPhrase);
6907         return 0;
6908       }
6909       pParse->nPhrase++;
6910     }
6911 
6912     if( sCtx.pPhrase==0 ){
6913       /* This happens when parsing a token or quoted phrase that contains
6914       ** no token characters at all. (e.g ... MATCH '""'). */
6915       sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase));
6916     }else if( sCtx.pPhrase->nTerm ){
6917       sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix;
6918     }
6919     pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase;
6920   }
6921 
6922   return sCtx.pPhrase;
6923 }
6924 
6925 /*
6926 ** Create a new FTS5 expression by cloning phrase iPhrase of the
6927 ** expression passed as the second argument.
6928 */
6929 static int sqlite3Fts5ExprClonePhrase(
6930   Fts5Expr *pExpr,
6931   int iPhrase,
6932   Fts5Expr **ppNew
6933 ){
6934   int rc = SQLITE_OK;             /* Return code */
6935   Fts5ExprPhrase *pOrig;          /* The phrase extracted from pExpr */
6936   Fts5Expr *pNew = 0;             /* Expression to return via *ppNew */
6937   TokenCtx sCtx = {0,0};          /* Context object for fts5ParseTokenize */
6938 
6939   pOrig = pExpr->apExprPhrase[iPhrase];
6940   pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr));
6941   if( rc==SQLITE_OK ){
6942     pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc,
6943         sizeof(Fts5ExprPhrase*));
6944   }
6945   if( rc==SQLITE_OK ){
6946     pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc,
6947         sizeof(Fts5ExprNode));
6948   }
6949   if( rc==SQLITE_OK ){
6950     pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc,
6951         sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*));
6952   }
6953   if( rc==SQLITE_OK ){
6954     Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset;
6955     if( pColsetOrig ){
6956       sqlite3_int64 nByte;
6957       Fts5Colset *pColset;
6958       nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int);
6959       pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte);
6960       if( pColset ){
6961         memcpy(pColset, pColsetOrig, (size_t)nByte);
6962       }
6963       pNew->pRoot->pNear->pColset = pColset;
6964     }
6965   }
6966 
6967   if( pOrig->nTerm ){
6968     int i;                          /* Used to iterate through phrase terms */
6969     for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
6970       int tflags = 0;
6971       Fts5ExprTerm *p;
6972       for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){
6973         const char *zTerm = p->zTerm;
6974         rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm),
6975             0, 0);
6976         tflags = FTS5_TOKEN_COLOCATED;
6977       }
6978       if( rc==SQLITE_OK ){
6979         sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
6980         sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst;
6981       }
6982     }
6983   }else{
6984     /* This happens when parsing a token or quoted phrase that contains
6985     ** no token characters at all. (e.g ... MATCH '""'). */
6986     sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase));
6987   }
6988 
6989   if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){
6990     /* All the allocations succeeded. Put the expression object together. */
6991     pNew->pIndex = pExpr->pIndex;
6992     pNew->pConfig = pExpr->pConfig;
6993     pNew->nPhrase = 1;
6994     pNew->apExprPhrase[0] = sCtx.pPhrase;
6995     pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase;
6996     pNew->pRoot->pNear->nPhrase = 1;
6997     sCtx.pPhrase->pNode = pNew->pRoot;
6998 
6999     if( pOrig->nTerm==1
7000      && pOrig->aTerm[0].pSynonym==0
7001      && pOrig->aTerm[0].bFirst==0
7002     ){
7003       pNew->pRoot->eType = FTS5_TERM;
7004       pNew->pRoot->xNext = fts5ExprNodeNext_TERM;
7005     }else{
7006       pNew->pRoot->eType = FTS5_STRING;
7007       pNew->pRoot->xNext = fts5ExprNodeNext_STRING;
7008     }
7009   }else{
7010     sqlite3Fts5ExprFree(pNew);
7011     fts5ExprPhraseFree(sCtx.pPhrase);
7012     pNew = 0;
7013   }
7014 
7015   *ppNew = pNew;
7016   return rc;
7017 }
7018 
7019 
7020 /*
7021 ** Token pTok has appeared in a MATCH expression where the NEAR operator
7022 ** is expected. If token pTok does not contain "NEAR", store an error
7023 ** in the pParse object.
7024 */
7025 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){
7026   if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){
7027     sqlite3Fts5ParseError(
7028         pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p
7029     );
7030   }
7031 }
7032 
7033 static void sqlite3Fts5ParseSetDistance(
7034   Fts5Parse *pParse,
7035   Fts5ExprNearset *pNear,
7036   Fts5Token *p
7037 ){
7038   if( pNear ){
7039     int nNear = 0;
7040     int i;
7041     if( p->n ){
7042       for(i=0; i<p->n; i++){
7043         char c = (char)p->p[i];
7044         if( c<'0' || c>'9' ){
7045           sqlite3Fts5ParseError(
7046               pParse, "expected integer, got \"%.*s\"", p->n, p->p
7047               );
7048           return;
7049         }
7050         nNear = nNear * 10 + (p->p[i] - '0');
7051       }
7052     }else{
7053       nNear = FTS5_DEFAULT_NEARDIST;
7054     }
7055     pNear->nNear = nNear;
7056   }
7057 }
7058 
7059 /*
7060 ** The second argument passed to this function may be NULL, or it may be
7061 ** an existing Fts5Colset object. This function returns a pointer to
7062 ** a new colset object containing the contents of (p) with new value column
7063 ** number iCol appended.
7064 **
7065 ** If an OOM error occurs, store an error code in pParse and return NULL.
7066 ** The old colset object (if any) is not freed in this case.
7067 */
7068 static Fts5Colset *fts5ParseColset(
7069   Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
7070   Fts5Colset *p,                  /* Existing colset object */
7071   int iCol                        /* New column to add to colset object */
7072 ){
7073   int nCol = p ? p->nCol : 0;     /* Num. columns already in colset object */
7074   Fts5Colset *pNew;               /* New colset object to return */
7075 
7076   assert( pParse->rc==SQLITE_OK );
7077   assert( iCol>=0 && iCol<pParse->pConfig->nCol );
7078 
7079   pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol);
7080   if( pNew==0 ){
7081     pParse->rc = SQLITE_NOMEM;
7082   }else{
7083     int *aiCol = pNew->aiCol;
7084     int i, j;
7085     for(i=0; i<nCol; i++){
7086       if( aiCol[i]==iCol ) return pNew;
7087       if( aiCol[i]>iCol ) break;
7088     }
7089     for(j=nCol; j>i; j--){
7090       aiCol[j] = aiCol[j-1];
7091     }
7092     aiCol[i] = iCol;
7093     pNew->nCol = nCol+1;
7094 
7095 #ifndef NDEBUG
7096     /* Check that the array is in order and contains no duplicate entries. */
7097     for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] );
7098 #endif
7099   }
7100 
7101   return pNew;
7102 }
7103 
7104 /*
7105 ** Allocate and return an Fts5Colset object specifying the inverse of
7106 ** the colset passed as the second argument. Free the colset passed
7107 ** as the second argument before returning.
7108 */
7109 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){
7110   Fts5Colset *pRet;
7111   int nCol = pParse->pConfig->nCol;
7112 
7113   pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc,
7114       sizeof(Fts5Colset) + sizeof(int)*nCol
7115   );
7116   if( pRet ){
7117     int i;
7118     int iOld = 0;
7119     for(i=0; i<nCol; i++){
7120       if( iOld>=p->nCol || p->aiCol[iOld]!=i ){
7121         pRet->aiCol[pRet->nCol++] = i;
7122       }else{
7123         iOld++;
7124       }
7125     }
7126   }
7127 
7128   sqlite3_free(p);
7129   return pRet;
7130 }
7131 
7132 static Fts5Colset *sqlite3Fts5ParseColset(
7133   Fts5Parse *pParse,              /* Store SQLITE_NOMEM here if required */
7134   Fts5Colset *pColset,            /* Existing colset object */
7135   Fts5Token *p
7136 ){
7137   Fts5Colset *pRet = 0;
7138   int iCol;
7139   char *z;                        /* Dequoted copy of token p */
7140 
7141   z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n);
7142   if( pParse->rc==SQLITE_OK ){
7143     Fts5Config *pConfig = pParse->pConfig;
7144     sqlite3Fts5Dequote(z);
7145     for(iCol=0; iCol<pConfig->nCol; iCol++){
7146       if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break;
7147     }
7148     if( iCol==pConfig->nCol ){
7149       sqlite3Fts5ParseError(pParse, "no such column: %s", z);
7150     }else{
7151       pRet = fts5ParseColset(pParse, pColset, iCol);
7152     }
7153     sqlite3_free(z);
7154   }
7155 
7156   if( pRet==0 ){
7157     assert( pParse->rc!=SQLITE_OK );
7158     sqlite3_free(pColset);
7159   }
7160 
7161   return pRet;
7162 }
7163 
7164 /*
7165 ** If argument pOrig is NULL, or if (*pRc) is set to anything other than
7166 ** SQLITE_OK when this function is called, NULL is returned.
7167 **
7168 ** Otherwise, a copy of (*pOrig) is made into memory obtained from
7169 ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation
7170 ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned.
7171 */
7172 static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){
7173   Fts5Colset *pRet;
7174   if( pOrig ){
7175     sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int);
7176     pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte);
7177     if( pRet ){
7178       memcpy(pRet, pOrig, (size_t)nByte);
7179     }
7180   }else{
7181     pRet = 0;
7182   }
7183   return pRet;
7184 }
7185 
7186 /*
7187 ** Remove from colset pColset any columns that are not also in colset pMerge.
7188 */
7189 static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){
7190   int iIn = 0;          /* Next input in pColset */
7191   int iMerge = 0;       /* Next input in pMerge */
7192   int iOut = 0;         /* Next output slot in pColset */
7193 
7194   while( iIn<pColset->nCol && iMerge<pMerge->nCol ){
7195     int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge];
7196     if( iDiff==0 ){
7197       pColset->aiCol[iOut++] = pMerge->aiCol[iMerge];
7198       iMerge++;
7199       iIn++;
7200     }else if( iDiff>0 ){
7201       iMerge++;
7202     }else{
7203       iIn++;
7204     }
7205   }
7206   pColset->nCol = iOut;
7207 }
7208 
7209 /*
7210 ** Recursively apply colset pColset to expression node pNode and all of
7211 ** its decendents. If (*ppFree) is not NULL, it contains a spare copy
7212 ** of pColset. This function may use the spare copy and set (*ppFree) to
7213 ** zero, or it may create copies of pColset using fts5CloneColset().
7214 */
7215 static void fts5ParseSetColset(
7216   Fts5Parse *pParse,
7217   Fts5ExprNode *pNode,
7218   Fts5Colset *pColset,
7219   Fts5Colset **ppFree
7220 ){
7221   if( pParse->rc==SQLITE_OK ){
7222     assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING
7223          || pNode->eType==FTS5_AND  || pNode->eType==FTS5_OR
7224          || pNode->eType==FTS5_NOT  || pNode->eType==FTS5_EOF
7225     );
7226     if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){
7227       Fts5ExprNearset *pNear = pNode->pNear;
7228       if( pNear->pColset ){
7229         fts5MergeColset(pNear->pColset, pColset);
7230         if( pNear->pColset->nCol==0 ){
7231           pNode->eType = FTS5_EOF;
7232           pNode->xNext = 0;
7233         }
7234       }else if( *ppFree ){
7235         pNear->pColset = pColset;
7236         *ppFree = 0;
7237       }else{
7238         pNear->pColset = fts5CloneColset(&pParse->rc, pColset);
7239       }
7240     }else{
7241       int i;
7242       assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 );
7243       for(i=0; i<pNode->nChild; i++){
7244         fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree);
7245       }
7246     }
7247   }
7248 }
7249 
7250 /*
7251 ** Apply colset pColset to expression node pExpr and all of its descendents.
7252 */
7253 static void sqlite3Fts5ParseSetColset(
7254   Fts5Parse *pParse,
7255   Fts5ExprNode *pExpr,
7256   Fts5Colset *pColset
7257 ){
7258   Fts5Colset *pFree = pColset;
7259   if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){
7260     sqlite3Fts5ParseError(pParse,
7261         "fts5: column queries are not supported (detail=none)"
7262     );
7263   }else{
7264     fts5ParseSetColset(pParse, pExpr, pColset, &pFree);
7265   }
7266   sqlite3_free(pFree);
7267 }
7268 
7269 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){
7270   switch( pNode->eType ){
7271     case FTS5_STRING: {
7272       Fts5ExprNearset *pNear = pNode->pNear;
7273       if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1
7274        && pNear->apPhrase[0]->aTerm[0].pSynonym==0
7275        && pNear->apPhrase[0]->aTerm[0].bFirst==0
7276       ){
7277         pNode->eType = FTS5_TERM;
7278         pNode->xNext = fts5ExprNodeNext_TERM;
7279       }else{
7280         pNode->xNext = fts5ExprNodeNext_STRING;
7281       }
7282       break;
7283     };
7284 
7285     case FTS5_OR: {
7286       pNode->xNext = fts5ExprNodeNext_OR;
7287       break;
7288     };
7289 
7290     case FTS5_AND: {
7291       pNode->xNext = fts5ExprNodeNext_AND;
7292       break;
7293     };
7294 
7295     default: assert( pNode->eType==FTS5_NOT ); {
7296       pNode->xNext = fts5ExprNodeNext_NOT;
7297       break;
7298     };
7299   }
7300 }
7301 
7302 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){
7303   if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){
7304     int nByte = sizeof(Fts5ExprNode*) * pSub->nChild;
7305     memcpy(&p->apChild[p->nChild], pSub->apChild, nByte);
7306     p->nChild += pSub->nChild;
7307     sqlite3_free(pSub);
7308   }else{
7309     p->apChild[p->nChild++] = pSub;
7310   }
7311 }
7312 
7313 /*
7314 ** This function is used when parsing LIKE or GLOB patterns against
7315 ** trigram indexes that specify either detail=column or detail=none.
7316 ** It converts a phrase:
7317 **
7318 **     abc + def + ghi
7319 **
7320 ** into an AND tree:
7321 **
7322 **     abc AND def AND ghi
7323 */
7324 static Fts5ExprNode *fts5ParsePhraseToAnd(
7325   Fts5Parse *pParse,
7326   Fts5ExprNearset *pNear
7327 ){
7328   int nTerm = pNear->apPhrase[0]->nTerm;
7329   int ii;
7330   int nByte;
7331   Fts5ExprNode *pRet;
7332 
7333   assert( pNear->nPhrase==1 );
7334   assert( pParse->bPhraseToAnd );
7335 
7336   nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*);
7337   pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
7338   if( pRet ){
7339     pRet->eType = FTS5_AND;
7340     pRet->nChild = nTerm;
7341     fts5ExprAssignXNext(pRet);
7342     pParse->nPhrase--;
7343     for(ii=0; ii<nTerm; ii++){
7344       Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero(
7345           &pParse->rc, sizeof(Fts5ExprPhrase)
7346       );
7347       if( pPhrase ){
7348         if( parseGrowPhraseArray(pParse) ){
7349           fts5ExprPhraseFree(pPhrase);
7350         }else{
7351           pParse->apPhrase[pParse->nPhrase++] = pPhrase;
7352           pPhrase->nTerm = 1;
7353           pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup(
7354               &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1
7355           );
7356           pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING,
7357               0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase)
7358           );
7359         }
7360       }
7361     }
7362 
7363     if( pParse->rc ){
7364       sqlite3Fts5ParseNodeFree(pRet);
7365       pRet = 0;
7366     }else{
7367       sqlite3Fts5ParseNearsetFree(pNear);
7368     }
7369   }
7370 
7371   return pRet;
7372 }
7373 
7374 /*
7375 ** Allocate and return a new expression object. If anything goes wrong (i.e.
7376 ** OOM error), leave an error code in pParse and return NULL.
7377 */
7378 static Fts5ExprNode *sqlite3Fts5ParseNode(
7379   Fts5Parse *pParse,              /* Parse context */
7380   int eType,                      /* FTS5_STRING, AND, OR or NOT */
7381   Fts5ExprNode *pLeft,            /* Left hand child expression */
7382   Fts5ExprNode *pRight,           /* Right hand child expression */
7383   Fts5ExprNearset *pNear          /* For STRING expressions, the near cluster */
7384 ){
7385   Fts5ExprNode *pRet = 0;
7386 
7387   if( pParse->rc==SQLITE_OK ){
7388     int nChild = 0;               /* Number of children of returned node */
7389     sqlite3_int64 nByte;          /* Bytes of space to allocate for this node */
7390 
7391     assert( (eType!=FTS5_STRING && !pNear)
7392          || (eType==FTS5_STRING && !pLeft && !pRight)
7393     );
7394     if( eType==FTS5_STRING && pNear==0 ) return 0;
7395     if( eType!=FTS5_STRING && pLeft==0 ) return pRight;
7396     if( eType!=FTS5_STRING && pRight==0 ) return pLeft;
7397 
7398     if( eType==FTS5_STRING
7399      && pParse->bPhraseToAnd
7400      && pNear->apPhrase[0]->nTerm>1
7401     ){
7402       pRet = fts5ParsePhraseToAnd(pParse, pNear);
7403     }else{
7404       if( eType==FTS5_NOT ){
7405         nChild = 2;
7406       }else if( eType==FTS5_AND || eType==FTS5_OR ){
7407         nChild = 2;
7408         if( pLeft->eType==eType ) nChild += pLeft->nChild-1;
7409         if( pRight->eType==eType ) nChild += pRight->nChild-1;
7410       }
7411 
7412       nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1);
7413       pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte);
7414 
7415       if( pRet ){
7416         pRet->eType = eType;
7417         pRet->pNear = pNear;
7418         fts5ExprAssignXNext(pRet);
7419         if( eType==FTS5_STRING ){
7420           int iPhrase;
7421           for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){
7422             pNear->apPhrase[iPhrase]->pNode = pRet;
7423             if( pNear->apPhrase[iPhrase]->nTerm==0 ){
7424               pRet->xNext = 0;
7425               pRet->eType = FTS5_EOF;
7426             }
7427           }
7428 
7429           if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){
7430             Fts5ExprPhrase *pPhrase = pNear->apPhrase[0];
7431             if( pNear->nPhrase!=1
7432                 || pPhrase->nTerm>1
7433                 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst)
7434               ){
7435               sqlite3Fts5ParseError(pParse,
7436                   "fts5: %s queries are not supported (detail!=full)",
7437                   pNear->nPhrase==1 ? "phrase": "NEAR"
7438               );
7439               sqlite3_free(pRet);
7440               pRet = 0;
7441             }
7442           }
7443         }else{
7444           fts5ExprAddChildren(pRet, pLeft);
7445           fts5ExprAddChildren(pRet, pRight);
7446         }
7447       }
7448     }
7449   }
7450 
7451   if( pRet==0 ){
7452     assert( pParse->rc!=SQLITE_OK );
7453     sqlite3Fts5ParseNodeFree(pLeft);
7454     sqlite3Fts5ParseNodeFree(pRight);
7455     sqlite3Fts5ParseNearsetFree(pNear);
7456   }
7457   return pRet;
7458 }
7459 
7460 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd(
7461   Fts5Parse *pParse,              /* Parse context */
7462   Fts5ExprNode *pLeft,            /* Left hand child expression */
7463   Fts5ExprNode *pRight            /* Right hand child expression */
7464 ){
7465   Fts5ExprNode *pRet = 0;
7466   Fts5ExprNode *pPrev;
7467 
7468   if( pParse->rc ){
7469     sqlite3Fts5ParseNodeFree(pLeft);
7470     sqlite3Fts5ParseNodeFree(pRight);
7471   }else{
7472 
7473     assert( pLeft->eType==FTS5_STRING
7474         || pLeft->eType==FTS5_TERM
7475         || pLeft->eType==FTS5_EOF
7476         || pLeft->eType==FTS5_AND
7477     );
7478     assert( pRight->eType==FTS5_STRING
7479         || pRight->eType==FTS5_TERM
7480         || pRight->eType==FTS5_EOF
7481     );
7482 
7483     if( pLeft->eType==FTS5_AND ){
7484       pPrev = pLeft->apChild[pLeft->nChild-1];
7485     }else{
7486       pPrev = pLeft;
7487     }
7488     assert( pPrev->eType==FTS5_STRING
7489         || pPrev->eType==FTS5_TERM
7490         || pPrev->eType==FTS5_EOF
7491         );
7492 
7493     if( pRight->eType==FTS5_EOF ){
7494       assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] );
7495       sqlite3Fts5ParseNodeFree(pRight);
7496       pRet = pLeft;
7497       pParse->nPhrase--;
7498     }
7499     else if( pPrev->eType==FTS5_EOF ){
7500       Fts5ExprPhrase **ap;
7501 
7502       if( pPrev==pLeft ){
7503         pRet = pRight;
7504       }else{
7505         pLeft->apChild[pLeft->nChild-1] = pRight;
7506         pRet = pLeft;
7507       }
7508 
7509       ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase];
7510       assert( ap[0]==pPrev->pNear->apPhrase[0] );
7511       memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase);
7512       pParse->nPhrase--;
7513 
7514       sqlite3Fts5ParseNodeFree(pPrev);
7515     }
7516     else{
7517       pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0);
7518     }
7519   }
7520 
7521   return pRet;
7522 }
7523 
7524 #ifdef SQLITE_TEST
7525 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){
7526   sqlite3_int64 nByte = 0;
7527   Fts5ExprTerm *p;
7528   char *zQuoted;
7529 
7530   /* Determine the maximum amount of space required. */
7531   for(p=pTerm; p; p=p->pSynonym){
7532     nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2;
7533   }
7534   zQuoted = sqlite3_malloc64(nByte);
7535 
7536   if( zQuoted ){
7537     int i = 0;
7538     for(p=pTerm; p; p=p->pSynonym){
7539       char *zIn = p->zTerm;
7540       zQuoted[i++] = '"';
7541       while( *zIn ){
7542         if( *zIn=='"' ) zQuoted[i++] = '"';
7543         zQuoted[i++] = *zIn++;
7544       }
7545       zQuoted[i++] = '"';
7546       if( p->pSynonym ) zQuoted[i++] = '|';
7547     }
7548     if( pTerm->bPrefix ){
7549       zQuoted[i++] = ' ';
7550       zQuoted[i++] = '*';
7551     }
7552     zQuoted[i++] = '\0';
7553   }
7554   return zQuoted;
7555 }
7556 
7557 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){
7558   char *zNew;
7559   va_list ap;
7560   va_start(ap, zFmt);
7561   zNew = sqlite3_vmprintf(zFmt, ap);
7562   va_end(ap);
7563   if( zApp && zNew ){
7564     char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew);
7565     sqlite3_free(zNew);
7566     zNew = zNew2;
7567   }
7568   sqlite3_free(zApp);
7569   return zNew;
7570 }
7571 
7572 /*
7573 ** Compose a tcl-readable representation of expression pExpr. Return a
7574 ** pointer to a buffer containing that representation. It is the
7575 ** responsibility of the caller to at some point free the buffer using
7576 ** sqlite3_free().
7577 */
7578 static char *fts5ExprPrintTcl(
7579   Fts5Config *pConfig,
7580   const char *zNearsetCmd,
7581   Fts5ExprNode *pExpr
7582 ){
7583   char *zRet = 0;
7584   if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
7585     Fts5ExprNearset *pNear = pExpr->pNear;
7586     int i;
7587     int iTerm;
7588 
7589     zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd);
7590     if( zRet==0 ) return 0;
7591     if( pNear->pColset ){
7592       int *aiCol = pNear->pColset->aiCol;
7593       int nCol = pNear->pColset->nCol;
7594       if( nCol==1 ){
7595         zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]);
7596       }else{
7597         zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]);
7598         for(i=1; i<pNear->pColset->nCol; i++){
7599           zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]);
7600         }
7601         zRet = fts5PrintfAppend(zRet, "} ");
7602       }
7603       if( zRet==0 ) return 0;
7604     }
7605 
7606     if( pNear->nPhrase>1 ){
7607       zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear);
7608       if( zRet==0 ) return 0;
7609     }
7610 
7611     zRet = fts5PrintfAppend(zRet, "--");
7612     if( zRet==0 ) return 0;
7613 
7614     for(i=0; i<pNear->nPhrase; i++){
7615       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
7616 
7617       zRet = fts5PrintfAppend(zRet, " {");
7618       for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){
7619         char *zTerm = pPhrase->aTerm[iTerm].zTerm;
7620         zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm);
7621         if( pPhrase->aTerm[iTerm].bPrefix ){
7622           zRet = fts5PrintfAppend(zRet, "*");
7623         }
7624       }
7625 
7626       if( zRet ) zRet = fts5PrintfAppend(zRet, "}");
7627       if( zRet==0 ) return 0;
7628     }
7629 
7630   }else{
7631     char const *zOp = 0;
7632     int i;
7633     switch( pExpr->eType ){
7634       case FTS5_AND: zOp = "AND"; break;
7635       case FTS5_NOT: zOp = "NOT"; break;
7636       default:
7637         assert( pExpr->eType==FTS5_OR );
7638         zOp = "OR";
7639         break;
7640     }
7641 
7642     zRet = sqlite3_mprintf("%s", zOp);
7643     for(i=0; zRet && i<pExpr->nChild; i++){
7644       char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]);
7645       if( !z ){
7646         sqlite3_free(zRet);
7647         zRet = 0;
7648       }else{
7649         zRet = fts5PrintfAppend(zRet, " [%z]", z);
7650       }
7651     }
7652   }
7653 
7654   return zRet;
7655 }
7656 
7657 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){
7658   char *zRet = 0;
7659   if( pExpr->eType==0 ){
7660     return sqlite3_mprintf("\"\"");
7661   }else
7662   if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){
7663     Fts5ExprNearset *pNear = pExpr->pNear;
7664     int i;
7665     int iTerm;
7666 
7667     if( pNear->pColset ){
7668       int ii;
7669       Fts5Colset *pColset = pNear->pColset;
7670       if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{");
7671       for(ii=0; ii<pColset->nCol; ii++){
7672         zRet = fts5PrintfAppend(zRet, "%s%s",
7673             pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " "
7674         );
7675       }
7676       if( zRet ){
7677         zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : "");
7678       }
7679       if( zRet==0 ) return 0;
7680     }
7681 
7682     if( pNear->nPhrase>1 ){
7683       zRet = fts5PrintfAppend(zRet, "NEAR(");
7684       if( zRet==0 ) return 0;
7685     }
7686 
7687     for(i=0; i<pNear->nPhrase; i++){
7688       Fts5ExprPhrase *pPhrase = pNear->apPhrase[i];
7689       if( i!=0 ){
7690         zRet = fts5PrintfAppend(zRet, " ");
7691         if( zRet==0 ) return 0;
7692       }
7693       for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){
7694         char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]);
7695         if( zTerm ){
7696           zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm);
7697           sqlite3_free(zTerm);
7698         }
7699         if( zTerm==0 || zRet==0 ){
7700           sqlite3_free(zRet);
7701           return 0;
7702         }
7703       }
7704     }
7705 
7706     if( pNear->nPhrase>1 ){
7707       zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear);
7708       if( zRet==0 ) return 0;
7709     }
7710 
7711   }else{
7712     char const *zOp = 0;
7713     int i;
7714 
7715     switch( pExpr->eType ){
7716       case FTS5_AND: zOp = " AND "; break;
7717       case FTS5_NOT: zOp = " NOT "; break;
7718       default:
7719         assert( pExpr->eType==FTS5_OR );
7720         zOp = " OR ";
7721         break;
7722     }
7723 
7724     for(i=0; i<pExpr->nChild; i++){
7725       char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]);
7726       if( z==0 ){
7727         sqlite3_free(zRet);
7728         zRet = 0;
7729       }else{
7730         int e = pExpr->apChild[i]->eType;
7731         int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF);
7732         zRet = fts5PrintfAppend(zRet, "%s%s%z%s",
7733             (i==0 ? "" : zOp),
7734             (b?"(":""), z, (b?")":"")
7735         );
7736       }
7737       if( zRet==0 ) break;
7738     }
7739   }
7740 
7741   return zRet;
7742 }
7743 
7744 /*
7745 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0)
7746 ** and fts5_expr_tcl() (bTcl!=0).
7747 */
7748 static void fts5ExprFunction(
7749   sqlite3_context *pCtx,          /* Function call context */
7750   int nArg,                       /* Number of args */
7751   sqlite3_value **apVal,          /* Function arguments */
7752   int bTcl
7753 ){
7754   Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
7755   sqlite3 *db = sqlite3_context_db_handle(pCtx);
7756   const char *zExpr = 0;
7757   char *zErr = 0;
7758   Fts5Expr *pExpr = 0;
7759   int rc;
7760   int i;
7761 
7762   const char **azConfig;          /* Array of arguments for Fts5Config */
7763   const char *zNearsetCmd = "nearset";
7764   int nConfig;                    /* Size of azConfig[] */
7765   Fts5Config *pConfig = 0;
7766   int iArg = 1;
7767 
7768   if( nArg<1 ){
7769     zErr = sqlite3_mprintf("wrong number of arguments to function %s",
7770         bTcl ? "fts5_expr_tcl" : "fts5_expr"
7771     );
7772     sqlite3_result_error(pCtx, zErr, -1);
7773     sqlite3_free(zErr);
7774     return;
7775   }
7776 
7777   if( bTcl && nArg>1 ){
7778     zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]);
7779     iArg = 2;
7780   }
7781 
7782   nConfig = 3 + (nArg-iArg);
7783   azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig);
7784   if( azConfig==0 ){
7785     sqlite3_result_error_nomem(pCtx);
7786     return;
7787   }
7788   azConfig[0] = 0;
7789   azConfig[1] = "main";
7790   azConfig[2] = "tbl";
7791   for(i=3; iArg<nArg; iArg++){
7792     const char *z = (const char*)sqlite3_value_text(apVal[iArg]);
7793     azConfig[i++] = (z ? z : "");
7794   }
7795 
7796   zExpr = (const char*)sqlite3_value_text(apVal[0]);
7797   if( zExpr==0 ) zExpr = "";
7798 
7799   rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr);
7800   if( rc==SQLITE_OK ){
7801     rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr);
7802   }
7803   if( rc==SQLITE_OK ){
7804     char *zText;
7805     if( pExpr->pRoot->xNext==0 ){
7806       zText = sqlite3_mprintf("");
7807     }else if( bTcl ){
7808       zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot);
7809     }else{
7810       zText = fts5ExprPrint(pConfig, pExpr->pRoot);
7811     }
7812     if( zText==0 ){
7813       rc = SQLITE_NOMEM;
7814     }else{
7815       sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT);
7816       sqlite3_free(zText);
7817     }
7818   }
7819 
7820   if( rc!=SQLITE_OK ){
7821     if( zErr ){
7822       sqlite3_result_error(pCtx, zErr, -1);
7823       sqlite3_free(zErr);
7824     }else{
7825       sqlite3_result_error_code(pCtx, rc);
7826     }
7827   }
7828   sqlite3_free((void *)azConfig);
7829   sqlite3Fts5ConfigFree(pConfig);
7830   sqlite3Fts5ExprFree(pExpr);
7831 }
7832 
7833 static void fts5ExprFunctionHr(
7834   sqlite3_context *pCtx,          /* Function call context */
7835   int nArg,                       /* Number of args */
7836   sqlite3_value **apVal           /* Function arguments */
7837 ){
7838   fts5ExprFunction(pCtx, nArg, apVal, 0);
7839 }
7840 static void fts5ExprFunctionTcl(
7841   sqlite3_context *pCtx,          /* Function call context */
7842   int nArg,                       /* Number of args */
7843   sqlite3_value **apVal           /* Function arguments */
7844 ){
7845   fts5ExprFunction(pCtx, nArg, apVal, 1);
7846 }
7847 
7848 /*
7849 ** The implementation of an SQLite user-defined-function that accepts a
7850 ** single integer as an argument. If the integer is an alpha-numeric
7851 ** unicode code point, 1 is returned. Otherwise 0.
7852 */
7853 static void fts5ExprIsAlnum(
7854   sqlite3_context *pCtx,          /* Function call context */
7855   int nArg,                       /* Number of args */
7856   sqlite3_value **apVal           /* Function arguments */
7857 ){
7858   int iCode;
7859   u8 aArr[32];
7860   if( nArg!=1 ){
7861     sqlite3_result_error(pCtx,
7862         "wrong number of arguments to function fts5_isalnum", -1
7863     );
7864     return;
7865   }
7866   memset(aArr, 0, sizeof(aArr));
7867   sqlite3Fts5UnicodeCatParse("L*", aArr);
7868   sqlite3Fts5UnicodeCatParse("N*", aArr);
7869   sqlite3Fts5UnicodeCatParse("Co", aArr);
7870   iCode = sqlite3_value_int(apVal[0]);
7871   sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]);
7872 }
7873 
7874 static void fts5ExprFold(
7875   sqlite3_context *pCtx,          /* Function call context */
7876   int nArg,                       /* Number of args */
7877   sqlite3_value **apVal           /* Function arguments */
7878 ){
7879   if( nArg!=1 && nArg!=2 ){
7880     sqlite3_result_error(pCtx,
7881         "wrong number of arguments to function fts5_fold", -1
7882     );
7883   }else{
7884     int iCode;
7885     int bRemoveDiacritics = 0;
7886     iCode = sqlite3_value_int(apVal[0]);
7887     if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]);
7888     sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics));
7889   }
7890 }
7891 #endif /* ifdef SQLITE_TEST */
7892 
7893 /*
7894 ** This is called during initialization to register the fts5_expr() scalar
7895 ** UDF with the SQLite handle passed as the only argument.
7896 */
7897 static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){
7898 #ifdef SQLITE_TEST
7899   struct Fts5ExprFunc {
7900     const char *z;
7901     void (*x)(sqlite3_context*,int,sqlite3_value**);
7902   } aFunc[] = {
7903     { "fts5_expr",     fts5ExprFunctionHr },
7904     { "fts5_expr_tcl", fts5ExprFunctionTcl },
7905     { "fts5_isalnum",  fts5ExprIsAlnum },
7906     { "fts5_fold",     fts5ExprFold },
7907   };
7908   int i;
7909   int rc = SQLITE_OK;
7910   void *pCtx = (void*)pGlobal;
7911 
7912   for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){
7913     struct Fts5ExprFunc *p = &aFunc[i];
7914     rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0);
7915   }
7916 #else
7917   int rc = SQLITE_OK;
7918   UNUSED_PARAM2(pGlobal,db);
7919 #endif
7920 
7921   /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and
7922   ** sqlite3Fts5ParserFallback() are unused */
7923 #ifndef NDEBUG
7924   (void)sqlite3Fts5ParserTrace;
7925 #endif
7926   (void)sqlite3Fts5ParserFallback;
7927 
7928   return rc;
7929 }
7930 
7931 /*
7932 ** Return the number of phrases in expression pExpr.
7933 */
7934 static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){
7935   return (pExpr ? pExpr->nPhrase : 0);
7936 }
7937 
7938 /*
7939 ** Return the number of terms in the iPhrase'th phrase in pExpr.
7940 */
7941 static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){
7942   if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0;
7943   return pExpr->apExprPhrase[iPhrase]->nTerm;
7944 }
7945 
7946 /*
7947 ** This function is used to access the current position list for phrase
7948 ** iPhrase.
7949 */
7950 static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){
7951   int nRet;
7952   Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
7953   Fts5ExprNode *pNode = pPhrase->pNode;
7954   if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){
7955     *pa = pPhrase->poslist.p;
7956     nRet = pPhrase->poslist.n;
7957   }else{
7958     *pa = 0;
7959     nRet = 0;
7960   }
7961   return nRet;
7962 }
7963 
7964 struct Fts5PoslistPopulator {
7965   Fts5PoslistWriter writer;
7966   int bOk;                        /* True if ok to populate */
7967   int bMiss;
7968 };
7969 
7970 /*
7971 ** Clear the position lists associated with all phrases in the expression
7972 ** passed as the first argument. Argument bLive is true if the expression
7973 ** might be pointing to a real entry, otherwise it has just been reset.
7974 **
7975 ** At present this function is only used for detail=col and detail=none
7976 ** fts5 tables. This implies that all phrases must be at most 1 token
7977 ** in size, as phrase matches are not supported without detail=full.
7978 */
7979 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){
7980   Fts5PoslistPopulator *pRet;
7981   pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7982   if( pRet ){
7983     int i;
7984     memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase);
7985     for(i=0; i<pExpr->nPhrase; i++){
7986       Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist;
7987       Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
7988       assert( pExpr->apExprPhrase[i]->nTerm<=1 );
7989       if( bLive &&
7990           (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof)
7991       ){
7992         pRet[i].bMiss = 1;
7993       }else{
7994         pBuf->n = 0;
7995       }
7996     }
7997   }
7998   return pRet;
7999 }
8000 
8001 struct Fts5ExprCtx {
8002   Fts5Expr *pExpr;
8003   Fts5PoslistPopulator *aPopulator;
8004   i64 iOff;
8005 };
8006 typedef struct Fts5ExprCtx Fts5ExprCtx;
8007 
8008 /*
8009 ** TODO: Make this more efficient!
8010 */
8011 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){
8012   int i;
8013   for(i=0; i<pColset->nCol; i++){
8014     if( pColset->aiCol[i]==iCol ) return 1;
8015   }
8016   return 0;
8017 }
8018 
8019 static int fts5ExprPopulatePoslistsCb(
8020   void *pCtx,                /* Copy of 2nd argument to xTokenize() */
8021   int tflags,                /* Mask of FTS5_TOKEN_* flags */
8022   const char *pToken,        /* Pointer to buffer containing token */
8023   int nToken,                /* Size of token in bytes */
8024   int iUnused1,              /* Byte offset of token within input text */
8025   int iUnused2               /* Byte offset of end of token within input text */
8026 ){
8027   Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx;
8028   Fts5Expr *pExpr = p->pExpr;
8029   int i;
8030 
8031   UNUSED_PARAM2(iUnused1, iUnused2);
8032 
8033   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
8034   if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++;
8035   for(i=0; i<pExpr->nPhrase; i++){
8036     Fts5ExprTerm *pTerm;
8037     if( p->aPopulator[i].bOk==0 ) continue;
8038     for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){
8039       int nTerm = (int)strlen(pTerm->zTerm);
8040       if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix))
8041        && memcmp(pTerm->zTerm, pToken, nTerm)==0
8042       ){
8043         int rc = sqlite3Fts5PoslistWriterAppend(
8044             &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff
8045         );
8046         if( rc ) return rc;
8047         break;
8048       }
8049     }
8050   }
8051   return SQLITE_OK;
8052 }
8053 
8054 static int sqlite3Fts5ExprPopulatePoslists(
8055   Fts5Config *pConfig,
8056   Fts5Expr *pExpr,
8057   Fts5PoslistPopulator *aPopulator,
8058   int iCol,
8059   const char *z, int n
8060 ){
8061   int i;
8062   Fts5ExprCtx sCtx;
8063   sCtx.pExpr = pExpr;
8064   sCtx.aPopulator = aPopulator;
8065   sCtx.iOff = (((i64)iCol) << 32) - 1;
8066 
8067   for(i=0; i<pExpr->nPhrase; i++){
8068     Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode;
8069     Fts5Colset *pColset = pNode->pNear->pColset;
8070     if( (pColset && 0==fts5ExprColsetTest(pColset, iCol))
8071      || aPopulator[i].bMiss
8072     ){
8073       aPopulator[i].bOk = 0;
8074     }else{
8075       aPopulator[i].bOk = 1;
8076     }
8077   }
8078 
8079   return sqlite3Fts5Tokenize(pConfig,
8080       FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb
8081   );
8082 }
8083 
8084 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){
8085   if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){
8086     pNode->pNear->apPhrase[0]->poslist.n = 0;
8087   }else{
8088     int i;
8089     for(i=0; i<pNode->nChild; i++){
8090       fts5ExprClearPoslists(pNode->apChild[i]);
8091     }
8092   }
8093 }
8094 
8095 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){
8096   pNode->iRowid = iRowid;
8097   pNode->bEof = 0;
8098   switch( pNode->eType ){
8099     case FTS5_TERM:
8100     case FTS5_STRING:
8101       return (pNode->pNear->apPhrase[0]->poslist.n>0);
8102 
8103     case FTS5_AND: {
8104       int i;
8105       for(i=0; i<pNode->nChild; i++){
8106         if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){
8107           fts5ExprClearPoslists(pNode);
8108           return 0;
8109         }
8110       }
8111       break;
8112     }
8113 
8114     case FTS5_OR: {
8115       int i;
8116       int bRet = 0;
8117       for(i=0; i<pNode->nChild; i++){
8118         if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){
8119           bRet = 1;
8120         }
8121       }
8122       return bRet;
8123     }
8124 
8125     default: {
8126       assert( pNode->eType==FTS5_NOT );
8127       if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid)
8128           || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid)
8129         ){
8130         fts5ExprClearPoslists(pNode);
8131         return 0;
8132       }
8133       break;
8134     }
8135   }
8136   return 1;
8137 }
8138 
8139 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){
8140   fts5ExprCheckPoslists(pExpr->pRoot, iRowid);
8141 }
8142 
8143 /*
8144 ** This function is only called for detail=columns tables.
8145 */
8146 static int sqlite3Fts5ExprPhraseCollist(
8147   Fts5Expr *pExpr,
8148   int iPhrase,
8149   const u8 **ppCollist,
8150   int *pnCollist
8151 ){
8152   Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase];
8153   Fts5ExprNode *pNode = pPhrase->pNode;
8154   int rc = SQLITE_OK;
8155 
8156   assert( iPhrase>=0 && iPhrase<pExpr->nPhrase );
8157   assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
8158 
8159   if( pNode->bEof==0
8160    && pNode->iRowid==pExpr->pRoot->iRowid
8161    && pPhrase->poslist.n>0
8162   ){
8163     Fts5ExprTerm *pTerm = &pPhrase->aTerm[0];
8164     if( pTerm->pSynonym ){
8165       Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1];
8166       rc = fts5ExprSynonymList(
8167           pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist
8168       );
8169     }else{
8170       *ppCollist = pPhrase->aTerm[0].pIter->pData;
8171       *pnCollist = pPhrase->aTerm[0].pIter->nData;
8172     }
8173   }else{
8174     *ppCollist = 0;
8175     *pnCollist = 0;
8176   }
8177 
8178   return rc;
8179 }
8180 
8181 #line 1 "fts5_hash.c"
8182 /*
8183 ** 2014 August 11
8184 **
8185 ** The author disclaims copyright to this source code.  In place of
8186 ** a legal notice, here is a blessing:
8187 **
8188 **    May you do good and not evil.
8189 **    May you find forgiveness for yourself and forgive others.
8190 **    May you share freely, never taking more than you give.
8191 **
8192 ******************************************************************************
8193 **
8194 */
8195 
8196 
8197 
8198 /* #include "fts5Int.h" */
8199 
8200 typedef struct Fts5HashEntry Fts5HashEntry;
8201 
8202 /*
8203 ** This file contains the implementation of an in-memory hash table used
8204 ** to accumuluate "term -> doclist" content before it is flused to a level-0
8205 ** segment.
8206 */
8207 
8208 
8209 struct Fts5Hash {
8210   int eDetail;                    /* Copy of Fts5Config.eDetail */
8211   int *pnByte;                    /* Pointer to bytes counter */
8212   int nEntry;                     /* Number of entries currently in hash */
8213   int nSlot;                      /* Size of aSlot[] array */
8214   Fts5HashEntry *pScan;           /* Current ordered scan item */
8215   Fts5HashEntry **aSlot;          /* Array of hash slots */
8216 };
8217 
8218 /*
8219 ** Each entry in the hash table is represented by an object of the
8220 ** following type. Each object, its key (a nul-terminated string) and
8221 ** its current data are stored in a single memory allocation. The
8222 ** key immediately follows the object in memory. The position list
8223 ** data immediately follows the key data in memory.
8224 **
8225 ** The data that follows the key is in a similar, but not identical format
8226 ** to the doclist data stored in the database. It is:
8227 **
8228 **   * Rowid, as a varint
8229 **   * Position list, without 0x00 terminator.
8230 **   * Size of previous position list and rowid, as a 4 byte
8231 **     big-endian integer.
8232 **
8233 ** iRowidOff:
8234 **   Offset of last rowid written to data area. Relative to first byte of
8235 **   structure.
8236 **
8237 ** nData:
8238 **   Bytes of data written since iRowidOff.
8239 */
8240 struct Fts5HashEntry {
8241   Fts5HashEntry *pHashNext;       /* Next hash entry with same hash-key */
8242   Fts5HashEntry *pScanNext;       /* Next entry in sorted order */
8243 
8244   int nAlloc;                     /* Total size of allocation */
8245   int iSzPoslist;                 /* Offset of space for 4-byte poslist size */
8246   int nData;                      /* Total bytes of data (incl. structure) */
8247   int nKey;                       /* Length of key in bytes */
8248   u8 bDel;                        /* Set delete-flag @ iSzPoslist */
8249   u8 bContent;                    /* Set content-flag (detail=none mode) */
8250   i16 iCol;                       /* Column of last value written */
8251   int iPos;                       /* Position of last value written */
8252   i64 iRowid;                     /* Rowid of last value written */
8253 };
8254 
8255 /*
8256 ** Eqivalent to:
8257 **
8258 **   char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; }
8259 */
8260 #define fts5EntryKey(p) ( ((char *)(&(p)[1])) )
8261 
8262 
8263 /*
8264 ** Allocate a new hash table.
8265 */
8266 static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){
8267   int rc = SQLITE_OK;
8268   Fts5Hash *pNew;
8269 
8270   *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash));
8271   if( pNew==0 ){
8272     rc = SQLITE_NOMEM;
8273   }else{
8274     sqlite3_int64 nByte;
8275     memset(pNew, 0, sizeof(Fts5Hash));
8276     pNew->pnByte = pnByte;
8277     pNew->eDetail = pConfig->eDetail;
8278 
8279     pNew->nSlot = 1024;
8280     nByte = sizeof(Fts5HashEntry*) * pNew->nSlot;
8281     pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64(nByte);
8282     if( pNew->aSlot==0 ){
8283       sqlite3_free(pNew);
8284       *ppNew = 0;
8285       rc = SQLITE_NOMEM;
8286     }else{
8287       memset(pNew->aSlot, 0, (size_t)nByte);
8288     }
8289   }
8290   return rc;
8291 }
8292 
8293 /*
8294 ** Free a hash table object.
8295 */
8296 static void sqlite3Fts5HashFree(Fts5Hash *pHash){
8297   if( pHash ){
8298     sqlite3Fts5HashClear(pHash);
8299     sqlite3_free(pHash->aSlot);
8300     sqlite3_free(pHash);
8301   }
8302 }
8303 
8304 /*
8305 ** Empty (but do not delete) a hash table.
8306 */
8307 static void sqlite3Fts5HashClear(Fts5Hash *pHash){
8308   int i;
8309   for(i=0; i<pHash->nSlot; i++){
8310     Fts5HashEntry *pNext;
8311     Fts5HashEntry *pSlot;
8312     for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){
8313       pNext = pSlot->pHashNext;
8314       sqlite3_free(pSlot);
8315     }
8316   }
8317   memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*));
8318   pHash->nEntry = 0;
8319 }
8320 
8321 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){
8322   int i;
8323   unsigned int h = 13;
8324   for(i=n-1; i>=0; i--){
8325     h = (h << 3) ^ h ^ p[i];
8326   }
8327   return (h % nSlot);
8328 }
8329 
8330 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){
8331   int i;
8332   unsigned int h = 13;
8333   for(i=n-1; i>=0; i--){
8334     h = (h << 3) ^ h ^ p[i];
8335   }
8336   h = (h << 3) ^ h ^ b;
8337   return (h % nSlot);
8338 }
8339 
8340 /*
8341 ** Resize the hash table by doubling the number of slots.
8342 */
8343 static int fts5HashResize(Fts5Hash *pHash){
8344   int nNew = pHash->nSlot*2;
8345   int i;
8346   Fts5HashEntry **apNew;
8347   Fts5HashEntry **apOld = pHash->aSlot;
8348 
8349   apNew = (Fts5HashEntry**)sqlite3_malloc64(nNew*sizeof(Fts5HashEntry*));
8350   if( !apNew ) return SQLITE_NOMEM;
8351   memset(apNew, 0, nNew*sizeof(Fts5HashEntry*));
8352 
8353   for(i=0; i<pHash->nSlot; i++){
8354     while( apOld[i] ){
8355       unsigned int iHash;
8356       Fts5HashEntry *p = apOld[i];
8357       apOld[i] = p->pHashNext;
8358       iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p),
8359                           (int)strlen(fts5EntryKey(p)));
8360       p->pHashNext = apNew[iHash];
8361       apNew[iHash] = p;
8362     }
8363   }
8364 
8365   sqlite3_free(apOld);
8366   pHash->nSlot = nNew;
8367   pHash->aSlot = apNew;
8368   return SQLITE_OK;
8369 }
8370 
8371 static int fts5HashAddPoslistSize(
8372   Fts5Hash *pHash,
8373   Fts5HashEntry *p,
8374   Fts5HashEntry *p2
8375 ){
8376   int nRet = 0;
8377   if( p->iSzPoslist ){
8378     u8 *pPtr = p2 ? (u8*)p2 : (u8*)p;
8379     int nData = p->nData;
8380     if( pHash->eDetail==FTS5_DETAIL_NONE ){
8381       assert( nData==p->iSzPoslist );
8382       if( p->bDel ){
8383         pPtr[nData++] = 0x00;
8384         if( p->bContent ){
8385           pPtr[nData++] = 0x00;
8386         }
8387       }
8388     }else{
8389       int nSz = (nData - p->iSzPoslist - 1);       /* Size in bytes */
8390       int nPos = nSz*2 + p->bDel;                     /* Value of nPos field */
8391 
8392       assert( p->bDel==0 || p->bDel==1 );
8393       if( nPos<=127 ){
8394         pPtr[p->iSzPoslist] = (u8)nPos;
8395       }else{
8396         int nByte = sqlite3Fts5GetVarintLen((u32)nPos);
8397         memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz);
8398         sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos);
8399         nData += (nByte-1);
8400       }
8401     }
8402 
8403     nRet = nData - p->nData;
8404     if( p2==0 ){
8405       p->iSzPoslist = 0;
8406       p->bDel = 0;
8407       p->bContent = 0;
8408       p->nData = nData;
8409     }
8410   }
8411   return nRet;
8412 }
8413 
8414 /*
8415 ** Add an entry to the in-memory hash table. The key is the concatenation
8416 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos).
8417 **
8418 **     (bByte || pToken) -> (iRowid,iCol,iPos)
8419 **
8420 ** Or, if iCol is negative, then the value is a delete marker.
8421 */
8422 static int sqlite3Fts5HashWrite(
8423   Fts5Hash *pHash,
8424   i64 iRowid,                     /* Rowid for this entry */
8425   int iCol,                       /* Column token appears in (-ve -> delete) */
8426   int iPos,                       /* Position of token within column */
8427   char bByte,                     /* First byte of token */
8428   const char *pToken, int nToken  /* Token to add or remove to or from index */
8429 ){
8430   unsigned int iHash;
8431   Fts5HashEntry *p;
8432   u8 *pPtr;
8433   int nIncr = 0;                  /* Amount to increment (*pHash->pnByte) by */
8434   int bNew;                       /* If non-delete entry should be written */
8435 
8436   bNew = (pHash->eDetail==FTS5_DETAIL_FULL);
8437 
8438   /* Attempt to locate an existing hash entry */
8439   iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
8440   for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
8441     char *zKey = fts5EntryKey(p);
8442     if( zKey[0]==bByte
8443      && p->nKey==nToken
8444      && memcmp(&zKey[1], pToken, nToken)==0
8445     ){
8446       break;
8447     }
8448   }
8449 
8450   /* If an existing hash entry cannot be found, create a new one. */
8451   if( p==0 ){
8452     /* Figure out how much space to allocate */
8453     char *zKey;
8454     sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64;
8455     if( nByte<128 ) nByte = 128;
8456 
8457     /* Grow the Fts5Hash.aSlot[] array if necessary. */
8458     if( (pHash->nEntry*2)>=pHash->nSlot ){
8459       int rc = fts5HashResize(pHash);
8460       if( rc!=SQLITE_OK ) return rc;
8461       iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken);
8462     }
8463 
8464     /* Allocate new Fts5HashEntry and add it to the hash table. */
8465     p = (Fts5HashEntry*)sqlite3_malloc64(nByte);
8466     if( !p ) return SQLITE_NOMEM;
8467     memset(p, 0, sizeof(Fts5HashEntry));
8468     p->nAlloc = (int)nByte;
8469     zKey = fts5EntryKey(p);
8470     zKey[0] = bByte;
8471     memcpy(&zKey[1], pToken, nToken);
8472     assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) );
8473     p->nKey = nToken;
8474     zKey[nToken+1] = '\0';
8475     p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry);
8476     p->pHashNext = pHash->aSlot[iHash];
8477     pHash->aSlot[iHash] = p;
8478     pHash->nEntry++;
8479 
8480     /* Add the first rowid field to the hash-entry */
8481     p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid);
8482     p->iRowid = iRowid;
8483 
8484     p->iSzPoslist = p->nData;
8485     if( pHash->eDetail!=FTS5_DETAIL_NONE ){
8486       p->nData += 1;
8487       p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
8488     }
8489 
8490   }else{
8491 
8492     /* Appending to an existing hash-entry. Check that there is enough
8493     ** space to append the largest possible new entry. Worst case scenario
8494     ** is:
8495     **
8496     **     + 9 bytes for a new rowid,
8497     **     + 4 byte reserved for the "poslist size" varint.
8498     **     + 1 byte for a "new column" byte,
8499     **     + 3 bytes for a new column number (16-bit max) as a varint,
8500     **     + 5 bytes for the new position offset (32-bit max).
8501     */
8502     if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){
8503       sqlite3_int64 nNew = p->nAlloc * 2;
8504       Fts5HashEntry *pNew;
8505       Fts5HashEntry **pp;
8506       pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew);
8507       if( pNew==0 ) return SQLITE_NOMEM;
8508       pNew->nAlloc = (int)nNew;
8509       for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext);
8510       *pp = pNew;
8511       p = pNew;
8512     }
8513     nIncr -= p->nData;
8514   }
8515   assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) );
8516 
8517   pPtr = (u8*)p;
8518 
8519   /* If this is a new rowid, append the 4-byte size field for the previous
8520   ** entry, and the new rowid for this entry.  */
8521   if( iRowid!=p->iRowid ){
8522     u64 iDiff = (u64)iRowid - (u64)p->iRowid;
8523     fts5HashAddPoslistSize(pHash, p, 0);
8524     p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff);
8525     p->iRowid = iRowid;
8526     bNew = 1;
8527     p->iSzPoslist = p->nData;
8528     if( pHash->eDetail!=FTS5_DETAIL_NONE ){
8529       p->nData += 1;
8530       p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1);
8531       p->iPos = 0;
8532     }
8533   }
8534 
8535   if( iCol>=0 ){
8536     if( pHash->eDetail==FTS5_DETAIL_NONE ){
8537       p->bContent = 1;
8538     }else{
8539       /* Append a new column value, if necessary */
8540       assert_nc( iCol>=p->iCol );
8541       if( iCol!=p->iCol ){
8542         if( pHash->eDetail==FTS5_DETAIL_FULL ){
8543           pPtr[p->nData++] = 0x01;
8544           p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol);
8545           p->iCol = (i16)iCol;
8546           p->iPos = 0;
8547         }else{
8548           bNew = 1;
8549           p->iCol = (i16)(iPos = iCol);
8550         }
8551       }
8552 
8553       /* Append the new position offset, if necessary */
8554       if( bNew ){
8555         p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2);
8556         p->iPos = iPos;
8557       }
8558     }
8559   }else{
8560     /* This is a delete. Set the delete flag. */
8561     p->bDel = 1;
8562   }
8563 
8564   nIncr += p->nData;
8565   *pHash->pnByte += nIncr;
8566   return SQLITE_OK;
8567 }
8568 
8569 
8570 /*
8571 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects,
8572 ** each sorted in key order. This function merges the two lists into a
8573 ** single list and returns a pointer to its first element.
8574 */
8575 static Fts5HashEntry *fts5HashEntryMerge(
8576   Fts5HashEntry *pLeft,
8577   Fts5HashEntry *pRight
8578 ){
8579   Fts5HashEntry *p1 = pLeft;
8580   Fts5HashEntry *p2 = pRight;
8581   Fts5HashEntry *pRet = 0;
8582   Fts5HashEntry **ppOut = &pRet;
8583 
8584   while( p1 || p2 ){
8585     if( p1==0 ){
8586       *ppOut = p2;
8587       p2 = 0;
8588     }else if( p2==0 ){
8589       *ppOut = p1;
8590       p1 = 0;
8591     }else{
8592       int i = 0;
8593       char *zKey1 = fts5EntryKey(p1);
8594       char *zKey2 = fts5EntryKey(p2);
8595       while( zKey1[i]==zKey2[i] ) i++;
8596 
8597       if( ((u8)zKey1[i])>((u8)zKey2[i]) ){
8598         /* p2 is smaller */
8599         *ppOut = p2;
8600         ppOut = &p2->pScanNext;
8601         p2 = p2->pScanNext;
8602       }else{
8603         /* p1 is smaller */
8604         *ppOut = p1;
8605         ppOut = &p1->pScanNext;
8606         p1 = p1->pScanNext;
8607       }
8608       *ppOut = 0;
8609     }
8610   }
8611 
8612   return pRet;
8613 }
8614 
8615 /*
8616 ** Extract all tokens from hash table iHash and link them into a list
8617 ** in sorted order. The hash table is cleared before returning. It is
8618 ** the responsibility of the caller to free the elements of the returned
8619 ** list.
8620 */
8621 static int fts5HashEntrySort(
8622   Fts5Hash *pHash,
8623   const char *pTerm, int nTerm,   /* Query prefix, if any */
8624   Fts5HashEntry **ppSorted
8625 ){
8626   const int nMergeSlot = 32;
8627   Fts5HashEntry **ap;
8628   Fts5HashEntry *pList;
8629   int iSlot;
8630   int i;
8631 
8632   *ppSorted = 0;
8633   ap = sqlite3_malloc64(sizeof(Fts5HashEntry*) * nMergeSlot);
8634   if( !ap ) return SQLITE_NOMEM;
8635   memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot);
8636 
8637   for(iSlot=0; iSlot<pHash->nSlot; iSlot++){
8638     Fts5HashEntry *pIter;
8639     for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){
8640       if( pTerm==0
8641        || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm))
8642       ){
8643         Fts5HashEntry *pEntry = pIter;
8644         pEntry->pScanNext = 0;
8645         for(i=0; ap[i]; i++){
8646           pEntry = fts5HashEntryMerge(pEntry, ap[i]);
8647           ap[i] = 0;
8648         }
8649         ap[i] = pEntry;
8650       }
8651     }
8652   }
8653 
8654   pList = 0;
8655   for(i=0; i<nMergeSlot; i++){
8656     pList = fts5HashEntryMerge(pList, ap[i]);
8657   }
8658 
8659   pHash->nEntry = 0;
8660   sqlite3_free(ap);
8661   *ppSorted = pList;
8662   return SQLITE_OK;
8663 }
8664 
8665 /*
8666 ** Query the hash table for a doclist associated with term pTerm/nTerm.
8667 */
8668 static int sqlite3Fts5HashQuery(
8669   Fts5Hash *pHash,                /* Hash table to query */
8670   int nPre,
8671   const char *pTerm, int nTerm,   /* Query term */
8672   void **ppOut,                   /* OUT: Pointer to new object */
8673   int *pnDoclist                  /* OUT: Size of doclist in bytes */
8674 ){
8675   unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm);
8676   char *zKey = 0;
8677   Fts5HashEntry *p;
8678 
8679   for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){
8680     zKey = fts5EntryKey(p);
8681     assert( p->nKey+1==(int)strlen(zKey) );
8682     if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break;
8683   }
8684 
8685   if( p ){
8686     int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1;
8687     int nList = p->nData - nHashPre;
8688     u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10));
8689     if( pRet ){
8690       Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre];
8691       memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList);
8692       nList += fts5HashAddPoslistSize(pHash, p, pFaux);
8693       *pnDoclist = nList;
8694     }else{
8695       *pnDoclist = 0;
8696       return SQLITE_NOMEM;
8697     }
8698   }else{
8699     *ppOut = 0;
8700     *pnDoclist = 0;
8701   }
8702 
8703   return SQLITE_OK;
8704 }
8705 
8706 static int sqlite3Fts5HashScanInit(
8707   Fts5Hash *p,                    /* Hash table to query */
8708   const char *pTerm, int nTerm    /* Query prefix */
8709 ){
8710   return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan);
8711 }
8712 
8713 static void sqlite3Fts5HashScanNext(Fts5Hash *p){
8714   assert( !sqlite3Fts5HashScanEof(p) );
8715   p->pScan = p->pScan->pScanNext;
8716 }
8717 
8718 static int sqlite3Fts5HashScanEof(Fts5Hash *p){
8719   return (p->pScan==0);
8720 }
8721 
8722 static void sqlite3Fts5HashScanEntry(
8723   Fts5Hash *pHash,
8724   const char **pzTerm,            /* OUT: term (nul-terminated) */
8725   const u8 **ppDoclist,           /* OUT: pointer to doclist */
8726   int *pnDoclist                  /* OUT: size of doclist in bytes */
8727 ){
8728   Fts5HashEntry *p;
8729   if( (p = pHash->pScan) ){
8730     char *zKey = fts5EntryKey(p);
8731     int nTerm = (int)strlen(zKey);
8732     fts5HashAddPoslistSize(pHash, p, 0);
8733     *pzTerm = zKey;
8734     *ppDoclist = (const u8*)&zKey[nTerm+1];
8735     *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1);
8736   }else{
8737     *pzTerm = 0;
8738     *ppDoclist = 0;
8739     *pnDoclist = 0;
8740   }
8741 }
8742 
8743 #line 1 "fts5_index.c"
8744 /*
8745 ** 2014 May 31
8746 **
8747 ** The author disclaims copyright to this source code.  In place of
8748 ** a legal notice, here is a blessing:
8749 **
8750 **    May you do good and not evil.
8751 **    May you find forgiveness for yourself and forgive others.
8752 **    May you share freely, never taking more than you give.
8753 **
8754 ******************************************************************************
8755 **
8756 ** Low level access to the FTS index stored in the database file. The
8757 ** routines in this file file implement all read and write access to the
8758 ** %_data table. Other parts of the system access this functionality via
8759 ** the interface defined in fts5Int.h.
8760 */
8761 
8762 
8763 /* #include "fts5Int.h" */
8764 
8765 /*
8766 ** Overview:
8767 **
8768 ** The %_data table contains all the FTS indexes for an FTS5 virtual table.
8769 ** As well as the main term index, there may be up to 31 prefix indexes.
8770 ** The format is similar to FTS3/4, except that:
8771 **
8772 **   * all segment b-tree leaf data is stored in fixed size page records
8773 **     (e.g. 1000 bytes). A single doclist may span multiple pages. Care is
8774 **     taken to ensure it is possible to iterate in either direction through
8775 **     the entries in a doclist, or to seek to a specific entry within a
8776 **     doclist, without loading it into memory.
8777 **
8778 **   * large doclists that span many pages have associated "doclist index"
8779 **     records that contain a copy of the first rowid on each page spanned by
8780 **     the doclist. This is used to speed up seek operations, and merges of
8781 **     large doclists with very small doclists.
8782 **
8783 **   * extra fields in the "structure record" record the state of ongoing
8784 **     incremental merge operations.
8785 **
8786 */
8787 
8788 
8789 #define FTS5_OPT_WORK_UNIT  1000  /* Number of leaf pages per optimize step */
8790 #define FTS5_WORK_UNIT      64    /* Number of leaf pages in unit of work */
8791 
8792 #define FTS5_MIN_DLIDX_SIZE 4     /* Add dlidx if this many empty pages */
8793 
8794 #define FTS5_MAIN_PREFIX '0'
8795 
8796 #if FTS5_MAX_PREFIX_INDEXES > 31
8797 # error "FTS5_MAX_PREFIX_INDEXES is too large"
8798 #endif
8799 
8800 /*
8801 ** Details:
8802 **
8803 ** The %_data table managed by this module,
8804 **
8805 **     CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB);
8806 **
8807 ** , contains the following 5 types of records. See the comments surrounding
8808 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are
8809 ** assigned to each fo them.
8810 **
8811 ** 1. Structure Records:
8812 **
8813 **   The set of segments that make up an index - the index structure - are
8814 **   recorded in a single record within the %_data table. The record consists
8815 **   of a single 32-bit configuration cookie value followed by a list of
8816 **   SQLite varints. If the FTS table features more than one index (because
8817 **   there are one or more prefix indexes), it is guaranteed that all share
8818 **   the same cookie value.
8819 **
8820 **   Immediately following the configuration cookie, the record begins with
8821 **   three varints:
8822 **
8823 **     + number of levels,
8824 **     + total number of segments on all levels,
8825 **     + value of write counter.
8826 **
8827 **   Then, for each level from 0 to nMax:
8828 **
8829 **     + number of input segments in ongoing merge.
8830 **     + total number of segments in level.
8831 **     + for each segment from oldest to newest:
8832 **         + segment id (always > 0)
8833 **         + first leaf page number (often 1, always greater than 0)
8834 **         + final leaf page number
8835 **
8836 ** 2. The Averages Record:
8837 **
8838 **   A single record within the %_data table. The data is a list of varints.
8839 **   The first value is the number of rows in the index. Then, for each column
8840 **   from left to right, the total number of tokens in the column for all
8841 **   rows of the table.
8842 **
8843 ** 3. Segment leaves:
8844 **
8845 **   TERM/DOCLIST FORMAT:
8846 **
8847 **     Most of each segment leaf is taken up by term/doclist data. The
8848 **     general format of term/doclist, starting with the first term
8849 **     on the leaf page, is:
8850 **
8851 **         varint : size of first term
8852 **         blob:    first term data
8853 **         doclist: first doclist
8854 **         zero-or-more {
8855 **           varint:  number of bytes in common with previous term
8856 **           varint:  number of bytes of new term data (nNew)
8857 **           blob:    nNew bytes of new term data
8858 **           doclist: next doclist
8859 **         }
8860 **
8861 **     doclist format:
8862 **
8863 **         varint:  first rowid
8864 **         poslist: first poslist
8865 **         zero-or-more {
8866 **           varint:  rowid delta (always > 0)
8867 **           poslist: next poslist
8868 **         }
8869 **
8870 **     poslist format:
8871 **
8872 **         varint: size of poslist in bytes multiplied by 2, not including
8873 **                 this field. Plus 1 if this entry carries the "delete" flag.
8874 **         collist: collist for column 0
8875 **         zero-or-more {
8876 **           0x01 byte
8877 **           varint: column number (I)
8878 **           collist: collist for column I
8879 **         }
8880 **
8881 **     collist format:
8882 **
8883 **         varint: first offset + 2
8884 **         zero-or-more {
8885 **           varint: offset delta + 2
8886 **         }
8887 **
8888 **   PAGE FORMAT
8889 **
8890 **     Each leaf page begins with a 4-byte header containing 2 16-bit
8891 **     unsigned integer fields in big-endian format. They are:
8892 **
8893 **       * The byte offset of the first rowid on the page, if it exists
8894 **         and occurs before the first term (otherwise 0).
8895 **
8896 **       * The byte offset of the start of the page footer. If the page
8897 **         footer is 0 bytes in size, then this field is the same as the
8898 **         size of the leaf page in bytes.
8899 **
8900 **     The page footer consists of a single varint for each term located
8901 **     on the page. Each varint is the byte offset of the current term
8902 **     within the page, delta-compressed against the previous value. In
8903 **     other words, the first varint in the footer is the byte offset of
8904 **     the first term, the second is the byte offset of the second less that
8905 **     of the first, and so on.
8906 **
8907 **     The term/doclist format described above is accurate if the entire
8908 **     term/doclist data fits on a single leaf page. If this is not the case,
8909 **     the format is changed in two ways:
8910 **
8911 **       + if the first rowid on a page occurs before the first term, it
8912 **         is stored as a literal value:
8913 **
8914 **             varint:  first rowid
8915 **
8916 **       + the first term on each page is stored in the same way as the
8917 **         very first term of the segment:
8918 **
8919 **             varint : size of first term
8920 **             blob:    first term data
8921 **
8922 ** 5. Segment doclist indexes:
8923 **
8924 **   Doclist indexes are themselves b-trees, however they usually consist of
8925 **   a single leaf record only. The format of each doclist index leaf page
8926 **   is:
8927 **
8928 **     * Flags byte. Bits are:
8929 **         0x01: Clear if leaf is also the root page, otherwise set.
8930 **
8931 **     * Page number of fts index leaf page. As a varint.
8932 **
8933 **     * First rowid on page indicated by previous field. As a varint.
8934 **
8935 **     * A list of varints, one for each subsequent termless page. A
8936 **       positive delta if the termless page contains at least one rowid,
8937 **       or an 0x00 byte otherwise.
8938 **
8939 **   Internal doclist index nodes are:
8940 **
8941 **     * Flags byte. Bits are:
8942 **         0x01: Clear for root page, otherwise set.
8943 **
8944 **     * Page number of first child page. As a varint.
8945 **
8946 **     * Copy of first rowid on page indicated by previous field. As a varint.
8947 **
8948 **     * A list of delta-encoded varints - the first rowid on each subsequent
8949 **       child page.
8950 **
8951 */
8952 
8953 /*
8954 ** Rowids for the averages and structure records in the %_data table.
8955 */
8956 #define FTS5_AVERAGES_ROWID     1    /* Rowid used for the averages record */
8957 #define FTS5_STRUCTURE_ROWID   10    /* The structure record */
8958 
8959 /*
8960 ** Macros determining the rowids used by segment leaves and dlidx leaves
8961 ** and nodes. All nodes and leaves are stored in the %_data table with large
8962 ** positive rowids.
8963 **
8964 ** Each segment has a unique non-zero 16-bit id.
8965 **
8966 ** The rowid for each segment leaf is found by passing the segment id and
8967 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered
8968 ** sequentially starting from 1.
8969 */
8970 #define FTS5_DATA_ID_B     16     /* Max seg id number 65535 */
8971 #define FTS5_DATA_DLI_B     1     /* Doclist-index flag (1 bit) */
8972 #define FTS5_DATA_HEIGHT_B  5     /* Max dlidx tree height of 32 */
8973 #define FTS5_DATA_PAGE_B   31     /* Max page number of 2147483648 */
8974 
8975 #define fts5_dri(segid, dlidx, height, pgno) (                                 \
8976  ((i64)(segid)  << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) +    \
8977  ((i64)(dlidx)  << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) +                  \
8978  ((i64)(height) << (FTS5_DATA_PAGE_B)) +                                       \
8979  ((i64)(pgno))                                                                 \
8980 )
8981 
8982 #define FTS5_SEGMENT_ROWID(segid, pgno)       fts5_dri(segid, 0, 0, pgno)
8983 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno)
8984 
8985 #ifdef SQLITE_DEBUG
8986 static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
8987 #endif
8988 
8989 
8990 /*
8991 ** Each time a blob is read from the %_data table, it is padded with this
8992 ** many zero bytes. This makes it easier to decode the various record formats
8993 ** without overreading if the records are corrupt.
8994 */
8995 #define FTS5_DATA_ZERO_PADDING 8
8996 #define FTS5_DATA_PADDING 20
8997 
8998 typedef struct Fts5Data Fts5Data;
8999 typedef struct Fts5DlidxIter Fts5DlidxIter;
9000 typedef struct Fts5DlidxLvl Fts5DlidxLvl;
9001 typedef struct Fts5DlidxWriter Fts5DlidxWriter;
9002 typedef struct Fts5Iter Fts5Iter;
9003 typedef struct Fts5PageWriter Fts5PageWriter;
9004 typedef struct Fts5SegIter Fts5SegIter;
9005 typedef struct Fts5DoclistIter Fts5DoclistIter;
9006 typedef struct Fts5SegWriter Fts5SegWriter;
9007 typedef struct Fts5Structure Fts5Structure;
9008 typedef struct Fts5StructureLevel Fts5StructureLevel;
9009 typedef struct Fts5StructureSegment Fts5StructureSegment;
9010 
9011 struct Fts5Data {
9012   u8 *p;                          /* Pointer to buffer containing record */
9013   int nn;                         /* Size of record in bytes */
9014   int szLeaf;                     /* Size of leaf without page-index */
9015 };
9016 
9017 /*
9018 ** One object per %_data table.
9019 */
9020 struct Fts5Index {
9021   Fts5Config *pConfig;            /* Virtual table configuration */
9022   char *zDataTbl;                 /* Name of %_data table */
9023   int nWorkUnit;                  /* Leaf pages in a "unit" of work */
9024 
9025   /*
9026   ** Variables related to the accumulation of tokens and doclists within the
9027   ** in-memory hash tables before they are flushed to disk.
9028   */
9029   Fts5Hash *pHash;                /* Hash table for in-memory data */
9030   int nPendingData;               /* Current bytes of pending data */
9031   i64 iWriteRowid;                /* Rowid for current doc being written */
9032   int bDelete;                    /* Current write is a delete */
9033 
9034   /* Error state. */
9035   int rc;                         /* Current error code */
9036 
9037   /* State used by the fts5DataXXX() functions. */
9038   sqlite3_blob *pReader;          /* RO incr-blob open on %_data table */
9039   sqlite3_stmt *pWriter;          /* "INSERT ... %_data VALUES(?,?)" */
9040   sqlite3_stmt *pDeleter;         /* "DELETE FROM %_data ... id>=? AND id<=?" */
9041   sqlite3_stmt *pIdxWriter;       /* "INSERT ... %_idx VALUES(?,?,?,?)" */
9042   sqlite3_stmt *pIdxDeleter;      /* "DELETE FROM %_idx WHERE segid=?" */
9043   sqlite3_stmt *pIdxSelect;
9044   int nRead;                      /* Total number of blocks read */
9045 
9046   sqlite3_stmt *pDataVersion;
9047   i64 iStructVersion;             /* data_version when pStruct read */
9048   Fts5Structure *pStruct;         /* Current db structure (or NULL) */
9049 };
9050 
9051 struct Fts5DoclistIter {
9052   u8 *aEof;                       /* Pointer to 1 byte past end of doclist */
9053 
9054   /* Output variables. aPoslist==0 at EOF */
9055   i64 iRowid;
9056   u8 *aPoslist;
9057   int nPoslist;
9058   int nSize;
9059 };
9060 
9061 /*
9062 ** The contents of the "structure" record for each index are represented
9063 ** using an Fts5Structure record in memory. Which uses instances of the
9064 ** other Fts5StructureXXX types as components.
9065 */
9066 struct Fts5StructureSegment {
9067   int iSegid;                     /* Segment id */
9068   int pgnoFirst;                  /* First leaf page number in segment */
9069   int pgnoLast;                   /* Last leaf page number in segment */
9070 };
9071 struct Fts5StructureLevel {
9072   int nMerge;                     /* Number of segments in incr-merge */
9073   int nSeg;                       /* Total number of segments on level */
9074   Fts5StructureSegment *aSeg;     /* Array of segments. aSeg[0] is oldest. */
9075 };
9076 struct Fts5Structure {
9077   int nRef;                       /* Object reference count */
9078   u64 nWriteCounter;              /* Total leaves written to level 0 */
9079   int nSegment;                   /* Total segments in this structure */
9080   int nLevel;                     /* Number of levels in this index */
9081   Fts5StructureLevel aLevel[1];   /* Array of nLevel level objects */
9082 };
9083 
9084 /*
9085 ** An object of type Fts5SegWriter is used to write to segments.
9086 */
9087 struct Fts5PageWriter {
9088   int pgno;                       /* Page number for this page */
9089   int iPrevPgidx;                 /* Previous value written into pgidx */
9090   Fts5Buffer buf;                 /* Buffer containing leaf data */
9091   Fts5Buffer pgidx;               /* Buffer containing page-index */
9092   Fts5Buffer term;                /* Buffer containing previous term on page */
9093 };
9094 struct Fts5DlidxWriter {
9095   int pgno;                       /* Page number for this page */
9096   int bPrevValid;                 /* True if iPrev is valid */
9097   i64 iPrev;                      /* Previous rowid value written to page */
9098   Fts5Buffer buf;                 /* Buffer containing page data */
9099 };
9100 struct Fts5SegWriter {
9101   int iSegid;                     /* Segid to write to */
9102   Fts5PageWriter writer;          /* PageWriter object */
9103   i64 iPrevRowid;                 /* Previous rowid written to current leaf */
9104   u8 bFirstRowidInDoclist;        /* True if next rowid is first in doclist */
9105   u8 bFirstRowidInPage;           /* True if next rowid is first in page */
9106   /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */
9107   u8 bFirstTermInPage;            /* True if next term will be first in leaf */
9108   int nLeafWritten;               /* Number of leaf pages written */
9109   int nEmpty;                     /* Number of contiguous term-less nodes */
9110 
9111   int nDlidx;                     /* Allocated size of aDlidx[] array */
9112   Fts5DlidxWriter *aDlidx;        /* Array of Fts5DlidxWriter objects */
9113 
9114   /* Values to insert into the %_idx table */
9115   Fts5Buffer btterm;              /* Next term to insert into %_idx table */
9116   int iBtPage;                    /* Page number corresponding to btterm */
9117 };
9118 
9119 typedef struct Fts5CResult Fts5CResult;
9120 struct Fts5CResult {
9121   u16 iFirst;                     /* aSeg[] index of firstest iterator */
9122   u8 bTermEq;                     /* True if the terms are equal */
9123 };
9124 
9125 /*
9126 ** Object for iterating through a single segment, visiting each term/rowid
9127 ** pair in the segment.
9128 **
9129 ** pSeg:
9130 **   The segment to iterate through.
9131 **
9132 ** iLeafPgno:
9133 **   Current leaf page number within segment.
9134 **
9135 ** iLeafOffset:
9136 **   Byte offset within the current leaf that is the first byte of the
9137 **   position list data (one byte passed the position-list size field).
9138 **   rowid field of the current entry. Usually this is the size field of the
9139 **   position list data. The exception is if the rowid for the current entry
9140 **   is the last thing on the leaf page.
9141 **
9142 ** pLeaf:
9143 **   Buffer containing current leaf page data. Set to NULL at EOF.
9144 **
9145 ** iTermLeafPgno, iTermLeafOffset:
9146 **   Leaf page number containing the last term read from the segment. And
9147 **   the offset immediately following the term data.
9148 **
9149 ** flags:
9150 **   Mask of FTS5_SEGITER_XXX values. Interpreted as follows:
9151 **
9152 **   FTS5_SEGITER_ONETERM:
9153 **     If set, set the iterator to point to EOF after the current doclist
9154 **     has been exhausted. Do not proceed to the next term in the segment.
9155 **
9156 **   FTS5_SEGITER_REVERSE:
9157 **     This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If
9158 **     it is set, iterate through rowid in descending order instead of the
9159 **     default ascending order.
9160 **
9161 ** iRowidOffset/nRowidOffset/aRowidOffset:
9162 **     These are used if the FTS5_SEGITER_REVERSE flag is set.
9163 **
9164 **     For each rowid on the page corresponding to the current term, the
9165 **     corresponding aRowidOffset[] entry is set to the byte offset of the
9166 **     start of the "position-list-size" field within the page.
9167 **
9168 ** iTermIdx:
9169 **     Index of current term on iTermLeafPgno.
9170 */
9171 struct Fts5SegIter {
9172   Fts5StructureSegment *pSeg;     /* Segment to iterate through */
9173   int flags;                      /* Mask of configuration flags */
9174   int iLeafPgno;                  /* Current leaf page number */
9175   Fts5Data *pLeaf;                /* Current leaf data */
9176   Fts5Data *pNextLeaf;            /* Leaf page (iLeafPgno+1) */
9177   i64 iLeafOffset;                /* Byte offset within current leaf */
9178 
9179   /* Next method */
9180   void (*xNext)(Fts5Index*, Fts5SegIter*, int*);
9181 
9182   /* The page and offset from which the current term was read. The offset
9183   ** is the offset of the first rowid in the current doclist.  */
9184   int iTermLeafPgno;
9185   int iTermLeafOffset;
9186 
9187   int iPgidxOff;                  /* Next offset in pgidx */
9188   int iEndofDoclist;
9189 
9190   /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */
9191   int iRowidOffset;               /* Current entry in aRowidOffset[] */
9192   int nRowidOffset;               /* Allocated size of aRowidOffset[] array */
9193   int *aRowidOffset;              /* Array of offset to rowid fields */
9194 
9195   Fts5DlidxIter *pDlidx;          /* If there is a doclist-index */
9196 
9197   /* Variables populated based on current entry. */
9198   Fts5Buffer term;                /* Current term */
9199   i64 iRowid;                     /* Current rowid */
9200   int nPos;                       /* Number of bytes in current position list */
9201   u8 bDel;                        /* True if the delete flag is set */
9202 };
9203 
9204 /*
9205 ** Argument is a pointer to an Fts5Data structure that contains a
9206 ** leaf page.
9207 */
9208 #define ASSERT_SZLEAF_OK(x) assert( \
9209     (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \
9210 )
9211 
9212 #define FTS5_SEGITER_ONETERM 0x01
9213 #define FTS5_SEGITER_REVERSE 0x02
9214 
9215 /*
9216 ** Argument is a pointer to an Fts5Data structure that contains a leaf
9217 ** page. This macro evaluates to true if the leaf contains no terms, or
9218 ** false if it contains at least one term.
9219 */
9220 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn)
9221 
9222 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2]))
9223 
9224 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p))
9225 
9226 /*
9227 ** Object for iterating through the merged results of one or more segments,
9228 ** visiting each term/rowid pair in the merged data.
9229 **
9230 ** nSeg is always a power of two greater than or equal to the number of
9231 ** segments that this object is merging data from. Both the aSeg[] and
9232 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded
9233 ** with zeroed objects - these are handled as if they were iterators opened
9234 ** on empty segments.
9235 **
9236 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an
9237 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the
9238 ** comparison in this context is the index of the iterator that currently
9239 ** points to the smaller term/rowid combination. Iterators at EOF are
9240 ** considered to be greater than all other iterators.
9241 **
9242 ** aFirst[1] contains the index in aSeg[] of the iterator that points to
9243 ** the smallest key overall. aFirst[0] is unused.
9244 **
9245 ** poslist:
9246 **   Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered.
9247 **   There is no way to tell if this is populated or not.
9248 */
9249 struct Fts5Iter {
9250   Fts5IndexIter base;             /* Base class containing output vars */
9251 
9252   Fts5Index *pIndex;              /* Index that owns this iterator */
9253   Fts5Buffer poslist;             /* Buffer containing current poslist */
9254   Fts5Colset *pColset;            /* Restrict matches to these columns */
9255 
9256   /* Invoked to set output variables. */
9257   void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*);
9258 
9259   int nSeg;                       /* Size of aSeg[] array */
9260   int bRev;                       /* True to iterate in reverse order */
9261   u8 bSkipEmpty;                  /* True to skip deleted entries */
9262 
9263   i64 iSwitchRowid;               /* Firstest rowid of other than aFirst[1] */
9264   Fts5CResult *aFirst;            /* Current merge state (see above) */
9265   Fts5SegIter aSeg[1];            /* Array of segment iterators */
9266 };
9267 
9268 
9269 /*
9270 ** An instance of the following type is used to iterate through the contents
9271 ** of a doclist-index record.
9272 **
9273 ** pData:
9274 **   Record containing the doclist-index data.
9275 **
9276 ** bEof:
9277 **   Set to true once iterator has reached EOF.
9278 **
9279 ** iOff:
9280 **   Set to the current offset within record pData.
9281 */
9282 struct Fts5DlidxLvl {
9283   Fts5Data *pData;              /* Data for current page of this level */
9284   int iOff;                     /* Current offset into pData */
9285   int bEof;                     /* At EOF already */
9286   int iFirstOff;                /* Used by reverse iterators */
9287 
9288   /* Output variables */
9289   int iLeafPgno;                /* Page number of current leaf page */
9290   i64 iRowid;                   /* First rowid on leaf iLeafPgno */
9291 };
9292 struct Fts5DlidxIter {
9293   int nLvl;
9294   int iSegid;
9295   Fts5DlidxLvl aLvl[1];
9296 };
9297 
9298 static void fts5PutU16(u8 *aOut, u16 iVal){
9299   aOut[0] = (iVal>>8);
9300   aOut[1] = (iVal&0xFF);
9301 }
9302 
9303 static u16 fts5GetU16(const u8 *aIn){
9304   return ((u16)aIn[0] << 8) + aIn[1];
9305 }
9306 
9307 /*
9308 ** Allocate and return a buffer at least nByte bytes in size.
9309 **
9310 ** If an OOM error is encountered, return NULL and set the error code in
9311 ** the Fts5Index handle passed as the first argument.
9312 */
9313 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){
9314   return sqlite3Fts5MallocZero(&p->rc, nByte);
9315 }
9316 
9317 /*
9318 ** Compare the contents of the pLeft buffer with the pRight/nRight blob.
9319 **
9320 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
9321 ** +ve if pRight is smaller than pLeft. In other words:
9322 **
9323 **     res = *pLeft - *pRight
9324 */
9325 #ifdef SQLITE_DEBUG
9326 static int fts5BufferCompareBlob(
9327   Fts5Buffer *pLeft,              /* Left hand side of comparison */
9328   const u8 *pRight, int nRight    /* Right hand side of comparison */
9329 ){
9330   int nCmp = MIN(pLeft->n, nRight);
9331   int res = memcmp(pLeft->p, pRight, nCmp);
9332   return (res==0 ? (pLeft->n - nRight) : res);
9333 }
9334 #endif
9335 
9336 /*
9337 ** Compare the contents of the two buffers using memcmp(). If one buffer
9338 ** is a prefix of the other, it is considered the lesser.
9339 **
9340 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or
9341 ** +ve if pRight is smaller than pLeft. In other words:
9342 **
9343 **     res = *pLeft - *pRight
9344 */
9345 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){
9346   int nCmp, res;
9347   nCmp = MIN(pLeft->n, pRight->n);
9348   assert( nCmp<=0 || pLeft->p!=0 );
9349   assert( nCmp<=0 || pRight->p!=0 );
9350   res = fts5Memcmp(pLeft->p, pRight->p, nCmp);
9351   return (res==0 ? (pLeft->n - pRight->n) : res);
9352 }
9353 
9354 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){
9355   int ret;
9356   fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret);
9357   return ret;
9358 }
9359 
9360 /*
9361 ** Close the read-only blob handle, if it is open.
9362 */
9363 static void sqlite3Fts5IndexCloseReader(Fts5Index *p){
9364   if( p->pReader ){
9365     sqlite3_blob *pReader = p->pReader;
9366     p->pReader = 0;
9367     sqlite3_blob_close(pReader);
9368   }
9369 }
9370 
9371 /*
9372 ** Retrieve a record from the %_data table.
9373 **
9374 ** If an error occurs, NULL is returned and an error left in the
9375 ** Fts5Index object.
9376 */
9377 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){
9378   Fts5Data *pRet = 0;
9379   if( p->rc==SQLITE_OK ){
9380     int rc = SQLITE_OK;
9381 
9382     if( p->pReader ){
9383       /* This call may return SQLITE_ABORT if there has been a savepoint
9384       ** rollback since it was last used. In this case a new blob handle
9385       ** is required.  */
9386       sqlite3_blob *pBlob = p->pReader;
9387       p->pReader = 0;
9388       rc = sqlite3_blob_reopen(pBlob, iRowid);
9389       assert( p->pReader==0 );
9390       p->pReader = pBlob;
9391       if( rc!=SQLITE_OK ){
9392         sqlite3Fts5IndexCloseReader(p);
9393       }
9394       if( rc==SQLITE_ABORT ) rc = SQLITE_OK;
9395     }
9396 
9397     /* If the blob handle is not open at this point, open it and seek
9398     ** to the requested entry.  */
9399     if( p->pReader==0 && rc==SQLITE_OK ){
9400       Fts5Config *pConfig = p->pConfig;
9401       rc = sqlite3_blob_open(pConfig->db,
9402           pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader
9403       );
9404     }
9405 
9406     /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls
9407     ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead.
9408     ** All the reasons those functions might return SQLITE_ERROR - missing
9409     ** table, missing row, non-blob/text in block column - indicate
9410     ** backing store corruption.  */
9411     if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT;
9412 
9413     if( rc==SQLITE_OK ){
9414       u8 *aOut = 0;               /* Read blob data into this buffer */
9415       int nByte = sqlite3_blob_bytes(p->pReader);
9416       sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING;
9417       pRet = (Fts5Data*)sqlite3_malloc64(nAlloc);
9418       if( pRet ){
9419         pRet->nn = nByte;
9420         aOut = pRet->p = (u8*)&pRet[1];
9421       }else{
9422         rc = SQLITE_NOMEM;
9423       }
9424 
9425       if( rc==SQLITE_OK ){
9426         rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0);
9427       }
9428       if( rc!=SQLITE_OK ){
9429         sqlite3_free(pRet);
9430         pRet = 0;
9431       }else{
9432         /* TODO1: Fix this */
9433         pRet->p[nByte] = 0x00;
9434         pRet->p[nByte+1] = 0x00;
9435         pRet->szLeaf = fts5GetU16(&pRet->p[2]);
9436       }
9437     }
9438     p->rc = rc;
9439     p->nRead++;
9440   }
9441 
9442   assert( (pRet==0)==(p->rc!=SQLITE_OK) );
9443   return pRet;
9444 }
9445 
9446 
9447 /*
9448 ** Release a reference to data record returned by an earlier call to
9449 ** fts5DataRead().
9450 */
9451 static void fts5DataRelease(Fts5Data *pData){
9452   sqlite3_free(pData);
9453 }
9454 
9455 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){
9456   Fts5Data *pRet = fts5DataRead(p, iRowid);
9457   if( pRet ){
9458     if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){
9459       p->rc = FTS5_CORRUPT;
9460       fts5DataRelease(pRet);
9461       pRet = 0;
9462     }
9463   }
9464   return pRet;
9465 }
9466 
9467 static int fts5IndexPrepareStmt(
9468   Fts5Index *p,
9469   sqlite3_stmt **ppStmt,
9470   char *zSql
9471 ){
9472   if( p->rc==SQLITE_OK ){
9473     if( zSql ){
9474       p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1,
9475           SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB,
9476           ppStmt, 0);
9477     }else{
9478       p->rc = SQLITE_NOMEM;
9479     }
9480   }
9481   sqlite3_free(zSql);
9482   return p->rc;
9483 }
9484 
9485 
9486 /*
9487 ** INSERT OR REPLACE a record into the %_data table.
9488 */
9489 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
9490   if( p->rc!=SQLITE_OK ) return;
9491 
9492   if( p->pWriter==0 ){
9493     Fts5Config *pConfig = p->pConfig;
9494     fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
9495           "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
9496           pConfig->zDb, pConfig->zName
9497     ));
9498     if( p->rc ) return;
9499   }
9500 
9501   sqlite3_bind_int64(p->pWriter, 1, iRowid);
9502   sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC);
9503   sqlite3_step(p->pWriter);
9504   p->rc = sqlite3_reset(p->pWriter);
9505   sqlite3_bind_null(p->pWriter, 2);
9506 }
9507 
9508 /*
9509 ** Execute the following SQL:
9510 **
9511 **     DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast
9512 */
9513 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
9514   if( p->rc!=SQLITE_OK ) return;
9515 
9516   if( p->pDeleter==0 ){
9517     Fts5Config *pConfig = p->pConfig;
9518     char *zSql = sqlite3_mprintf(
9519         "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
9520           pConfig->zDb, pConfig->zName
9521     );
9522     if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return;
9523   }
9524 
9525   sqlite3_bind_int64(p->pDeleter, 1, iFirst);
9526   sqlite3_bind_int64(p->pDeleter, 2, iLast);
9527   sqlite3_step(p->pDeleter);
9528   p->rc = sqlite3_reset(p->pDeleter);
9529 }
9530 
9531 /*
9532 ** Remove all records associated with segment iSegid.
9533 */
9534 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
9535   i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0);
9536   i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1;
9537   fts5DataDelete(p, iFirst, iLast);
9538   if( p->pIdxDeleter==0 ){
9539     Fts5Config *pConfig = p->pConfig;
9540     fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
9541           "DELETE FROM '%q'.'%q_idx' WHERE segid=?",
9542           pConfig->zDb, pConfig->zName
9543     ));
9544   }
9545   if( p->rc==SQLITE_OK ){
9546     sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
9547     sqlite3_step(p->pIdxDeleter);
9548     p->rc = sqlite3_reset(p->pIdxDeleter);
9549   }
9550 }
9551 
9552 /*
9553 ** Release a reference to an Fts5Structure object returned by an earlier
9554 ** call to fts5StructureRead() or fts5StructureDecode().
9555 */
9556 static void fts5StructureRelease(Fts5Structure *pStruct){
9557   if( pStruct && 0>=(--pStruct->nRef) ){
9558     int i;
9559     assert( pStruct->nRef==0 );
9560     for(i=0; i<pStruct->nLevel; i++){
9561       sqlite3_free(pStruct->aLevel[i].aSeg);
9562     }
9563     sqlite3_free(pStruct);
9564   }
9565 }
9566 
9567 static void fts5StructureRef(Fts5Structure *pStruct){
9568   pStruct->nRef++;
9569 }
9570 
9571 static void *sqlite3Fts5StructureRef(Fts5Index *p){
9572   fts5StructureRef(p->pStruct);
9573   return (void*)p->pStruct;
9574 }
9575 static void sqlite3Fts5StructureRelease(void *p){
9576   if( p ){
9577     fts5StructureRelease((Fts5Structure*)p);
9578   }
9579 }
9580 static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){
9581   if( p->pStruct!=(Fts5Structure*)pStruct ){
9582     return SQLITE_ABORT;
9583   }
9584   return SQLITE_OK;
9585 }
9586 
9587 /*
9588 ** Ensure that structure object (*pp) is writable.
9589 **
9590 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If
9591 ** an error occurs, (*pRc) is set to an SQLite error code before returning.
9592 */
9593 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){
9594   Fts5Structure *p = *pp;
9595   if( *pRc==SQLITE_OK && p->nRef>1 ){
9596     i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel);
9597     Fts5Structure *pNew;
9598     pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte);
9599     if( pNew ){
9600       int i;
9601       memcpy(pNew, p, nByte);
9602       for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0;
9603       for(i=0; i<p->nLevel; i++){
9604         Fts5StructureLevel *pLvl = &pNew->aLevel[i];
9605         nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg;
9606         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte);
9607         if( pLvl->aSeg==0 ){
9608           for(i=0; i<p->nLevel; i++){
9609             sqlite3_free(pNew->aLevel[i].aSeg);
9610           }
9611           sqlite3_free(pNew);
9612           return;
9613         }
9614         memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte);
9615       }
9616       p->nRef--;
9617       pNew->nRef = 1;
9618     }
9619     *pp = pNew;
9620   }
9621 }
9622 
9623 /*
9624 ** Deserialize and return the structure record currently stored in serialized
9625 ** form within buffer pData/nData.
9626 **
9627 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
9628 ** are over-allocated by one slot. This allows the structure contents
9629 ** to be more easily edited.
9630 **
9631 ** If an error occurs, *ppOut is set to NULL and an SQLite error code
9632 ** returned. Otherwise, *ppOut is set to point to the new object and
9633 ** SQLITE_OK returned.
9634 */
9635 static int fts5StructureDecode(
9636   const u8 *pData,                /* Buffer containing serialized structure */
9637   int nData,                      /* Size of buffer pData in bytes */
9638   int *piCookie,                  /* Configuration cookie value */
9639   Fts5Structure **ppOut           /* OUT: Deserialized object */
9640 ){
9641   int rc = SQLITE_OK;
9642   int i = 0;
9643   int iLvl;
9644   int nLevel = 0;
9645   int nSegment = 0;
9646   sqlite3_int64 nByte;            /* Bytes of space to allocate at pRet */
9647   Fts5Structure *pRet = 0;        /* Structure object to return */
9648 
9649   /* Grab the cookie value */
9650   if( piCookie ) *piCookie = sqlite3Fts5Get32(pData);
9651   i = 4;
9652 
9653   /* Read the total number of levels and segments from the start of the
9654   ** structure record.  */
9655   i += fts5GetVarint32(&pData[i], nLevel);
9656   i += fts5GetVarint32(&pData[i], nSegment);
9657   if( nLevel>FTS5_MAX_SEGMENT   || nLevel<0
9658    || nSegment>FTS5_MAX_SEGMENT || nSegment<0
9659   ){
9660     return FTS5_CORRUPT;
9661   }
9662   nByte = (
9663       sizeof(Fts5Structure) +                    /* Main structure */
9664       sizeof(Fts5StructureLevel) * (nLevel-1)    /* aLevel[] array */
9665   );
9666   pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte);
9667 
9668   if( pRet ){
9669     pRet->nRef = 1;
9670     pRet->nLevel = nLevel;
9671     pRet->nSegment = nSegment;
9672     i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter);
9673 
9674     for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){
9675       Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl];
9676       int nTotal = 0;
9677       int iSeg;
9678 
9679       if( i>=nData ){
9680         rc = FTS5_CORRUPT;
9681       }else{
9682         i += fts5GetVarint32(&pData[i], pLvl->nMerge);
9683         i += fts5GetVarint32(&pData[i], nTotal);
9684         if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT;
9685         pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc,
9686             nTotal * sizeof(Fts5StructureSegment)
9687         );
9688         nSegment -= nTotal;
9689       }
9690 
9691       if( rc==SQLITE_OK ){
9692         pLvl->nSeg = nTotal;
9693         for(iSeg=0; iSeg<nTotal; iSeg++){
9694           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
9695           if( i>=nData ){
9696             rc = FTS5_CORRUPT;
9697             break;
9698           }
9699           i += fts5GetVarint32(&pData[i], pSeg->iSegid);
9700           i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst);
9701           i += fts5GetVarint32(&pData[i], pSeg->pgnoLast);
9702           if( pSeg->pgnoLast<pSeg->pgnoFirst ){
9703             rc = FTS5_CORRUPT;
9704             break;
9705           }
9706         }
9707         if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT;
9708         if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT;
9709       }
9710     }
9711     if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT;
9712 
9713     if( rc!=SQLITE_OK ){
9714       fts5StructureRelease(pRet);
9715       pRet = 0;
9716     }
9717   }
9718 
9719   *ppOut = pRet;
9720   return rc;
9721 }
9722 
9723 /*
9724 ** Add a level to the Fts5Structure.aLevel[] array of structure object
9725 ** (*ppStruct).
9726 */
9727 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){
9728   fts5StructureMakeWritable(pRc, ppStruct);
9729   if( *pRc==SQLITE_OK ){
9730     Fts5Structure *pStruct = *ppStruct;
9731     int nLevel = pStruct->nLevel;
9732     sqlite3_int64 nByte = (
9733         sizeof(Fts5Structure) +                  /* Main structure */
9734         sizeof(Fts5StructureLevel) * (nLevel+1)  /* aLevel[] array */
9735     );
9736 
9737     pStruct = sqlite3_realloc64(pStruct, nByte);
9738     if( pStruct ){
9739       memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel));
9740       pStruct->nLevel++;
9741       *ppStruct = pStruct;
9742     }else{
9743       *pRc = SQLITE_NOMEM;
9744     }
9745   }
9746 }
9747 
9748 /*
9749 ** Extend level iLvl so that there is room for at least nExtra more
9750 ** segments.
9751 */
9752 static void fts5StructureExtendLevel(
9753   int *pRc,
9754   Fts5Structure *pStruct,
9755   int iLvl,
9756   int nExtra,
9757   int bInsert
9758 ){
9759   if( *pRc==SQLITE_OK ){
9760     Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
9761     Fts5StructureSegment *aNew;
9762     sqlite3_int64 nByte;
9763 
9764     nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment);
9765     aNew = sqlite3_realloc64(pLvl->aSeg, nByte);
9766     if( aNew ){
9767       if( bInsert==0 ){
9768         memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra);
9769       }else{
9770         int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment);
9771         memmove(&aNew[nExtra], aNew, nMove);
9772         memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra);
9773       }
9774       pLvl->aSeg = aNew;
9775     }else{
9776       *pRc = SQLITE_NOMEM;
9777     }
9778   }
9779 }
9780 
9781 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){
9782   Fts5Structure *pRet = 0;
9783   Fts5Config *pConfig = p->pConfig;
9784   int iCookie;                    /* Configuration cookie */
9785   Fts5Data *pData;
9786 
9787   pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID);
9788   if( p->rc==SQLITE_OK ){
9789     /* TODO: Do we need this if the leaf-index is appended? Probably... */
9790     memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING);
9791     p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet);
9792     if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){
9793       p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie);
9794     }
9795     fts5DataRelease(pData);
9796     if( p->rc!=SQLITE_OK ){
9797       fts5StructureRelease(pRet);
9798       pRet = 0;
9799     }
9800   }
9801 
9802   return pRet;
9803 }
9804 
9805 static i64 fts5IndexDataVersion(Fts5Index *p){
9806   i64 iVersion = 0;
9807 
9808   if( p->rc==SQLITE_OK ){
9809     if( p->pDataVersion==0 ){
9810       p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion,
9811           sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb)
9812           );
9813       if( p->rc ) return 0;
9814     }
9815 
9816     if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){
9817       iVersion = sqlite3_column_int64(p->pDataVersion, 0);
9818     }
9819     p->rc = sqlite3_reset(p->pDataVersion);
9820   }
9821 
9822   return iVersion;
9823 }
9824 
9825 /*
9826 ** Read, deserialize and return the structure record.
9827 **
9828 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array
9829 ** are over-allocated as described for function fts5StructureDecode()
9830 ** above.
9831 **
9832 ** If an error occurs, NULL is returned and an error code left in the
9833 ** Fts5Index handle. If an error has already occurred when this function
9834 ** is called, it is a no-op.
9835 */
9836 static Fts5Structure *fts5StructureRead(Fts5Index *p){
9837 
9838   if( p->pStruct==0 ){
9839     p->iStructVersion = fts5IndexDataVersion(p);
9840     if( p->rc==SQLITE_OK ){
9841       p->pStruct = fts5StructureReadUncached(p);
9842     }
9843   }
9844 
9845 #if 0
9846   else{
9847     Fts5Structure *pTest = fts5StructureReadUncached(p);
9848     if( pTest ){
9849       int i, j;
9850       assert_nc( p->pStruct->nSegment==pTest->nSegment );
9851       assert_nc( p->pStruct->nLevel==pTest->nLevel );
9852       for(i=0; i<pTest->nLevel; i++){
9853         assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge );
9854         assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg );
9855         for(j=0; j<pTest->aLevel[i].nSeg; j++){
9856           Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j];
9857           Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j];
9858           assert_nc( p1->iSegid==p2->iSegid );
9859           assert_nc( p1->pgnoFirst==p2->pgnoFirst );
9860           assert_nc( p1->pgnoLast==p2->pgnoLast );
9861         }
9862       }
9863       fts5StructureRelease(pTest);
9864     }
9865   }
9866 #endif
9867 
9868   if( p->rc!=SQLITE_OK ) return 0;
9869   assert( p->iStructVersion!=0 );
9870   assert( p->pStruct!=0 );
9871   fts5StructureRef(p->pStruct);
9872   return p->pStruct;
9873 }
9874 
9875 static void fts5StructureInvalidate(Fts5Index *p){
9876   if( p->pStruct ){
9877     fts5StructureRelease(p->pStruct);
9878     p->pStruct = 0;
9879   }
9880 }
9881 
9882 /*
9883 ** Return the total number of segments in index structure pStruct. This
9884 ** function is only ever used as part of assert() conditions.
9885 */
9886 #ifdef SQLITE_DEBUG
9887 static int fts5StructureCountSegments(Fts5Structure *pStruct){
9888   int nSegment = 0;               /* Total number of segments */
9889   if( pStruct ){
9890     int iLvl;                     /* Used to iterate through levels */
9891     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9892       nSegment += pStruct->aLevel[iLvl].nSeg;
9893     }
9894   }
9895 
9896   return nSegment;
9897 }
9898 #endif
9899 
9900 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) {     \
9901   assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) );             \
9902   memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob);             \
9903   (pBuf)->n += nBlob;                                      \
9904 }
9905 
9906 #define fts5BufferSafeAppendVarint(pBuf, iVal) {                \
9907   (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal));  \
9908   assert( (pBuf)->nSpace>=(pBuf)->n );                          \
9909 }
9910 
9911 
9912 /*
9913 ** Serialize and store the "structure" record.
9914 **
9915 ** If an error occurs, leave an error code in the Fts5Index object. If an
9916 ** error has already occurred, this function is a no-op.
9917 */
9918 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){
9919   if( p->rc==SQLITE_OK ){
9920     Fts5Buffer buf;               /* Buffer to serialize record into */
9921     int iLvl;                     /* Used to iterate through levels */
9922     int iCookie;                  /* Cookie value to store */
9923 
9924     assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
9925     memset(&buf, 0, sizeof(Fts5Buffer));
9926 
9927     /* Append the current configuration cookie */
9928     iCookie = p->pConfig->iCookie;
9929     if( iCookie<0 ) iCookie = 0;
9930 
9931     if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){
9932       sqlite3Fts5Put32(buf.p, iCookie);
9933       buf.n = 4;
9934       fts5BufferSafeAppendVarint(&buf, pStruct->nLevel);
9935       fts5BufferSafeAppendVarint(&buf, pStruct->nSegment);
9936       fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter);
9937     }
9938 
9939     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
9940       int iSeg;                     /* Used to iterate through segments */
9941       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
9942       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge);
9943       fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg);
9944       assert( pLvl->nMerge<=pLvl->nSeg );
9945 
9946       for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
9947         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid);
9948         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst);
9949         fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast);
9950       }
9951     }
9952 
9953     fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n);
9954     fts5BufferFree(&buf);
9955   }
9956 }
9957 
9958 #if 0
9959 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*);
9960 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){
9961   int rc = SQLITE_OK;
9962   Fts5Buffer buf;
9963   memset(&buf, 0, sizeof(buf));
9964   fts5DebugStructure(&rc, &buf, pStruct);
9965   fprintf(stdout, "%s: %s\n", zCaption, buf.p);
9966   fflush(stdout);
9967   fts5BufferFree(&buf);
9968 }
9969 #else
9970 # define fts5PrintStructure(x,y)
9971 #endif
9972 
9973 static int fts5SegmentSize(Fts5StructureSegment *pSeg){
9974   return 1 + pSeg->pgnoLast - pSeg->pgnoFirst;
9975 }
9976 
9977 /*
9978 ** Return a copy of index structure pStruct. Except, promote as many
9979 ** segments as possible to level iPromote. If an OOM occurs, NULL is
9980 ** returned.
9981 */
9982 static void fts5StructurePromoteTo(
9983   Fts5Index *p,
9984   int iPromote,
9985   int szPromote,
9986   Fts5Structure *pStruct
9987 ){
9988   int il, is;
9989   Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote];
9990 
9991   if( pOut->nMerge==0 ){
9992     for(il=iPromote+1; il<pStruct->nLevel; il++){
9993       Fts5StructureLevel *pLvl = &pStruct->aLevel[il];
9994       if( pLvl->nMerge ) return;
9995       for(is=pLvl->nSeg-1; is>=0; is--){
9996         int sz = fts5SegmentSize(&pLvl->aSeg[is]);
9997         if( sz>szPromote ) return;
9998         fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1);
9999         if( p->rc ) return;
10000         memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment));
10001         pOut->nSeg++;
10002         pLvl->nSeg--;
10003       }
10004     }
10005   }
10006 }
10007 
10008 /*
10009 ** A new segment has just been written to level iLvl of index structure
10010 ** pStruct. This function determines if any segments should be promoted
10011 ** as a result. Segments are promoted in two scenarios:
10012 **
10013 **   a) If the segment just written is smaller than one or more segments
10014 **      within the previous populated level, it is promoted to the previous
10015 **      populated level.
10016 **
10017 **   b) If the segment just written is larger than the newest segment on
10018 **      the next populated level, then that segment, and any other adjacent
10019 **      segments that are also smaller than the one just written, are
10020 **      promoted.
10021 **
10022 ** If one or more segments are promoted, the structure object is updated
10023 ** to reflect this.
10024 */
10025 static void fts5StructurePromote(
10026   Fts5Index *p,                   /* FTS5 backend object */
10027   int iLvl,                       /* Index level just updated */
10028   Fts5Structure *pStruct          /* Index structure */
10029 ){
10030   if( p->rc==SQLITE_OK ){
10031     int iTst;
10032     int iPromote = -1;
10033     int szPromote = 0;            /* Promote anything this size or smaller */
10034     Fts5StructureSegment *pSeg;   /* Segment just written */
10035     int szSeg;                    /* Size of segment just written */
10036     int nSeg = pStruct->aLevel[iLvl].nSeg;
10037 
10038     if( nSeg==0 ) return;
10039     pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1];
10040     szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst);
10041 
10042     /* Check for condition (a) */
10043     for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--);
10044     if( iTst>=0 ){
10045       int i;
10046       int szMax = 0;
10047       Fts5StructureLevel *pTst = &pStruct->aLevel[iTst];
10048       assert( pTst->nMerge==0 );
10049       for(i=0; i<pTst->nSeg; i++){
10050         int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1;
10051         if( sz>szMax ) szMax = sz;
10052       }
10053       if( szMax>=szSeg ){
10054         /* Condition (a) is true. Promote the newest segment on level
10055         ** iLvl to level iTst.  */
10056         iPromote = iTst;
10057         szPromote = szMax;
10058       }
10059     }
10060 
10061     /* If condition (a) is not met, assume (b) is true. StructurePromoteTo()
10062     ** is a no-op if it is not.  */
10063     if( iPromote<0 ){
10064       iPromote = iLvl;
10065       szPromote = szSeg;
10066     }
10067     fts5StructurePromoteTo(p, iPromote, szPromote, pStruct);
10068   }
10069 }
10070 
10071 
10072 /*
10073 ** Advance the iterator passed as the only argument. If the end of the
10074 ** doclist-index page is reached, return non-zero.
10075 */
10076 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){
10077   Fts5Data *pData = pLvl->pData;
10078 
10079   if( pLvl->iOff==0 ){
10080     assert( pLvl->bEof==0 );
10081     pLvl->iOff = 1;
10082     pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno);
10083     pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid);
10084     pLvl->iFirstOff = pLvl->iOff;
10085   }else{
10086     int iOff;
10087     for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){
10088       if( pData->p[iOff] ) break;
10089     }
10090 
10091     if( iOff<pData->nn ){
10092       i64 iVal;
10093       pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1;
10094       iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal);
10095       pLvl->iRowid += iVal;
10096       pLvl->iOff = iOff;
10097     }else{
10098       pLvl->bEof = 1;
10099     }
10100   }
10101 
10102   return pLvl->bEof;
10103 }
10104 
10105 /*
10106 ** Advance the iterator passed as the only argument.
10107 */
10108 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
10109   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
10110 
10111   assert( iLvl<pIter->nLvl );
10112   if( fts5DlidxLvlNext(pLvl) ){
10113     if( (iLvl+1) < pIter->nLvl ){
10114       fts5DlidxIterNextR(p, pIter, iLvl+1);
10115       if( pLvl[1].bEof==0 ){
10116         fts5DataRelease(pLvl->pData);
10117         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10118         pLvl->pData = fts5DataRead(p,
10119             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
10120         );
10121         if( pLvl->pData ) fts5DlidxLvlNext(pLvl);
10122       }
10123     }
10124   }
10125 
10126   return pIter->aLvl[0].bEof;
10127 }
10128 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){
10129   return fts5DlidxIterNextR(p, pIter, 0);
10130 }
10131 
10132 /*
10133 ** The iterator passed as the first argument has the following fields set
10134 ** as follows. This function sets up the rest of the iterator so that it
10135 ** points to the first rowid in the doclist-index.
10136 **
10137 **   pData:
10138 **     pointer to doclist-index record,
10139 **
10140 ** When this function is called pIter->iLeafPgno is the page number the
10141 ** doclist is associated with (the one featuring the term).
10142 */
10143 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){
10144   int i;
10145   for(i=0; i<pIter->nLvl; i++){
10146     fts5DlidxLvlNext(&pIter->aLvl[i]);
10147   }
10148   return pIter->aLvl[0].bEof;
10149 }
10150 
10151 
10152 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){
10153   return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof;
10154 }
10155 
10156 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){
10157   int i;
10158 
10159   /* Advance each level to the last entry on the last page */
10160   for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){
10161     Fts5DlidxLvl *pLvl = &pIter->aLvl[i];
10162     while( fts5DlidxLvlNext(pLvl)==0 );
10163     pLvl->bEof = 0;
10164 
10165     if( i>0 ){
10166       Fts5DlidxLvl *pChild = &pLvl[-1];
10167       fts5DataRelease(pChild->pData);
10168       memset(pChild, 0, sizeof(Fts5DlidxLvl));
10169       pChild->pData = fts5DataRead(p,
10170           FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno)
10171       );
10172     }
10173   }
10174 }
10175 
10176 /*
10177 ** Move the iterator passed as the only argument to the previous entry.
10178 */
10179 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){
10180   int iOff = pLvl->iOff;
10181 
10182   assert( pLvl->bEof==0 );
10183   if( iOff<=pLvl->iFirstOff ){
10184     pLvl->bEof = 1;
10185   }else{
10186     u8 *a = pLvl->pData->p;
10187     i64 iVal;
10188     int iLimit;
10189     int ii;
10190     int nZero = 0;
10191 
10192     /* Currently iOff points to the first byte of a varint. This block
10193     ** decrements iOff until it points to the first byte of the previous
10194     ** varint. Taking care not to read any memory locations that occur
10195     ** before the buffer in memory.  */
10196     iLimit = (iOff>9 ? iOff-9 : 0);
10197     for(iOff--; iOff>iLimit; iOff--){
10198       if( (a[iOff-1] & 0x80)==0 ) break;
10199     }
10200 
10201     fts5GetVarint(&a[iOff], (u64*)&iVal);
10202     pLvl->iRowid -= iVal;
10203     pLvl->iLeafPgno--;
10204 
10205     /* Skip backwards past any 0x00 varints. */
10206     for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){
10207       nZero++;
10208     }
10209     if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){
10210       /* The byte immediately before the last 0x00 byte has the 0x80 bit
10211       ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80
10212       ** bytes before a[ii]. */
10213       int bZero = 0;              /* True if last 0x00 counts */
10214       if( (ii-8)>=pLvl->iFirstOff ){
10215         int j;
10216         for(j=1; j<=8 && (a[ii-j] & 0x80); j++);
10217         bZero = (j>8);
10218       }
10219       if( bZero==0 ) nZero--;
10220     }
10221     pLvl->iLeafPgno -= nZero;
10222     pLvl->iOff = iOff - nZero;
10223   }
10224 
10225   return pLvl->bEof;
10226 }
10227 
10228 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){
10229   Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl];
10230 
10231   assert( iLvl<pIter->nLvl );
10232   if( fts5DlidxLvlPrev(pLvl) ){
10233     if( (iLvl+1) < pIter->nLvl ){
10234       fts5DlidxIterPrevR(p, pIter, iLvl+1);
10235       if( pLvl[1].bEof==0 ){
10236         fts5DataRelease(pLvl->pData);
10237         memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10238         pLvl->pData = fts5DataRead(p,
10239             FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno)
10240         );
10241         if( pLvl->pData ){
10242           while( fts5DlidxLvlNext(pLvl)==0 );
10243           pLvl->bEof = 0;
10244         }
10245       }
10246     }
10247   }
10248 
10249   return pIter->aLvl[0].bEof;
10250 }
10251 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){
10252   return fts5DlidxIterPrevR(p, pIter, 0);
10253 }
10254 
10255 /*
10256 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit().
10257 */
10258 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){
10259   if( pIter ){
10260     int i;
10261     for(i=0; i<pIter->nLvl; i++){
10262       fts5DataRelease(pIter->aLvl[i].pData);
10263     }
10264     sqlite3_free(pIter);
10265   }
10266 }
10267 
10268 static Fts5DlidxIter *fts5DlidxIterInit(
10269   Fts5Index *p,                   /* Fts5 Backend to iterate within */
10270   int bRev,                       /* True for ORDER BY ASC */
10271   int iSegid,                     /* Segment id */
10272   int iLeafPg                     /* Leaf page number to load dlidx for */
10273 ){
10274   Fts5DlidxIter *pIter = 0;
10275   int i;
10276   int bDone = 0;
10277 
10278   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
10279     sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl);
10280     Fts5DlidxIter *pNew;
10281 
10282     pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte);
10283     if( pNew==0 ){
10284       p->rc = SQLITE_NOMEM;
10285     }else{
10286       i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg);
10287       Fts5DlidxLvl *pLvl = &pNew->aLvl[i];
10288       pIter = pNew;
10289       memset(pLvl, 0, sizeof(Fts5DlidxLvl));
10290       pLvl->pData = fts5DataRead(p, iRowid);
10291       if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){
10292         bDone = 1;
10293       }
10294       pIter->nLvl = i+1;
10295     }
10296   }
10297 
10298   if( p->rc==SQLITE_OK ){
10299     pIter->iSegid = iSegid;
10300     if( bRev==0 ){
10301       fts5DlidxIterFirst(pIter);
10302     }else{
10303       fts5DlidxIterLast(p, pIter);
10304     }
10305   }
10306 
10307   if( p->rc!=SQLITE_OK ){
10308     fts5DlidxIterFree(pIter);
10309     pIter = 0;
10310   }
10311 
10312   return pIter;
10313 }
10314 
10315 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){
10316   return pIter->aLvl[0].iRowid;
10317 }
10318 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){
10319   return pIter->aLvl[0].iLeafPgno;
10320 }
10321 
10322 /*
10323 ** Load the next leaf page into the segment iterator.
10324 */
10325 static void fts5SegIterNextPage(
10326   Fts5Index *p,                   /* FTS5 backend object */
10327   Fts5SegIter *pIter              /* Iterator to advance to next page */
10328 ){
10329   Fts5Data *pLeaf;
10330   Fts5StructureSegment *pSeg = pIter->pSeg;
10331   fts5DataRelease(pIter->pLeaf);
10332   pIter->iLeafPgno++;
10333   if( pIter->pNextLeaf ){
10334     pIter->pLeaf = pIter->pNextLeaf;
10335     pIter->pNextLeaf = 0;
10336   }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){
10337     pIter->pLeaf = fts5LeafRead(p,
10338         FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno)
10339     );
10340   }else{
10341     pIter->pLeaf = 0;
10342   }
10343   pLeaf = pIter->pLeaf;
10344 
10345   if( pLeaf ){
10346     pIter->iPgidxOff = pLeaf->szLeaf;
10347     if( fts5LeafIsTermless(pLeaf) ){
10348       pIter->iEndofDoclist = pLeaf->nn+1;
10349     }else{
10350       pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff],
10351           pIter->iEndofDoclist
10352       );
10353     }
10354   }
10355 }
10356 
10357 /*
10358 ** Argument p points to a buffer containing a varint to be interpreted as a
10359 ** position list size field. Read the varint and return the number of bytes
10360 ** read. Before returning, set *pnSz to the number of bytes in the position
10361 ** list, and *pbDel to true if the delete flag is set, or false otherwise.
10362 */
10363 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){
10364   int nSz;
10365   int n = 0;
10366   fts5FastGetVarint32(p, n, nSz);
10367   assert_nc( nSz>=0 );
10368   *pnSz = nSz/2;
10369   *pbDel = nSz & 0x0001;
10370   return n;
10371 }
10372 
10373 /*
10374 ** Fts5SegIter.iLeafOffset currently points to the first byte of a
10375 ** position-list size field. Read the value of the field and store it
10376 ** in the following variables:
10377 **
10378 **   Fts5SegIter.nPos
10379 **   Fts5SegIter.bDel
10380 **
10381 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the
10382 ** position list content (if any).
10383 */
10384 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){
10385   if( p->rc==SQLITE_OK ){
10386     int iOff = pIter->iLeafOffset;  /* Offset to read at */
10387     ASSERT_SZLEAF_OK(pIter->pLeaf);
10388     if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
10389       int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf);
10390       pIter->bDel = 0;
10391       pIter->nPos = 1;
10392       if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
10393         pIter->bDel = 1;
10394         iOff++;
10395         if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){
10396           pIter->nPos = 1;
10397           iOff++;
10398         }else{
10399           pIter->nPos = 0;
10400         }
10401       }
10402     }else{
10403       int nSz;
10404       fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz);
10405       pIter->bDel = (nSz & 0x0001);
10406       pIter->nPos = nSz>>1;
10407       assert_nc( pIter->nPos>=0 );
10408     }
10409     pIter->iLeafOffset = iOff;
10410   }
10411 }
10412 
10413 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){
10414   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
10415   i64 iOff = pIter->iLeafOffset;
10416 
10417   ASSERT_SZLEAF_OK(pIter->pLeaf);
10418   if( iOff>=pIter->pLeaf->szLeaf ){
10419     fts5SegIterNextPage(p, pIter);
10420     if( pIter->pLeaf==0 ){
10421       if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
10422       return;
10423     }
10424     iOff = 4;
10425     a = pIter->pLeaf->p;
10426   }
10427   iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
10428   pIter->iLeafOffset = iOff;
10429 }
10430 
10431 /*
10432 ** Fts5SegIter.iLeafOffset currently points to the first byte of the
10433 ** "nSuffix" field of a term. Function parameter nKeep contains the value
10434 ** of the "nPrefix" field (if there was one - it is passed 0 if this is
10435 ** the first term in the segment).
10436 **
10437 ** This function populates:
10438 **
10439 **   Fts5SegIter.term
10440 **   Fts5SegIter.rowid
10441 **
10442 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of
10443 ** the first position list. The position list belonging to document
10444 ** (Fts5SegIter.iRowid).
10445 */
10446 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){
10447   u8 *a = pIter->pLeaf->p;        /* Buffer to read data from */
10448   i64 iOff = pIter->iLeafOffset;  /* Offset to read at */
10449   int nNew;                       /* Bytes of new data */
10450 
10451   iOff += fts5GetVarint32(&a[iOff], nNew);
10452   if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){
10453     p->rc = FTS5_CORRUPT;
10454     return;
10455   }
10456   pIter->term.n = nKeep;
10457   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
10458   assert( pIter->term.n<=pIter->term.nSpace );
10459   iOff += nNew;
10460   pIter->iTermLeafOffset = iOff;
10461   pIter->iTermLeafPgno = pIter->iLeafPgno;
10462   pIter->iLeafOffset = iOff;
10463 
10464   if( pIter->iPgidxOff>=pIter->pLeaf->nn ){
10465     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
10466   }else{
10467     int nExtra;
10468     pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra);
10469     pIter->iEndofDoclist += nExtra;
10470   }
10471 
10472   fts5SegIterLoadRowid(p, pIter);
10473 }
10474 
10475 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*);
10476 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*);
10477 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*);
10478 
10479 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){
10480   if( pIter->flags & FTS5_SEGITER_REVERSE ){
10481     pIter->xNext = fts5SegIterNext_Reverse;
10482   }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
10483     pIter->xNext = fts5SegIterNext_None;
10484   }else{
10485     pIter->xNext = fts5SegIterNext;
10486   }
10487 }
10488 
10489 /*
10490 ** Initialize the iterator object pIter to iterate through the entries in
10491 ** segment pSeg. The iterator is left pointing to the first entry when
10492 ** this function returns.
10493 **
10494 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
10495 ** an error has already occurred when this function is called, it is a no-op.
10496 */
10497 static void fts5SegIterInit(
10498   Fts5Index *p,                   /* FTS index object */
10499   Fts5StructureSegment *pSeg,     /* Description of segment */
10500   Fts5SegIter *pIter              /* Object to populate */
10501 ){
10502   if( pSeg->pgnoFirst==0 ){
10503     /* This happens if the segment is being used as an input to an incremental
10504     ** merge and all data has already been "trimmed". See function
10505     ** fts5TrimSegments() for details. In this case leave the iterator empty.
10506     ** The caller will see the (pIter->pLeaf==0) and assume the iterator is
10507     ** at EOF already. */
10508     assert( pIter->pLeaf==0 );
10509     return;
10510   }
10511 
10512   if( p->rc==SQLITE_OK ){
10513     memset(pIter, 0, sizeof(*pIter));
10514     fts5SegIterSetNext(p, pIter);
10515     pIter->pSeg = pSeg;
10516     pIter->iLeafPgno = pSeg->pgnoFirst-1;
10517     fts5SegIterNextPage(p, pIter);
10518   }
10519 
10520   if( p->rc==SQLITE_OK ){
10521     pIter->iLeafOffset = 4;
10522     assert( pIter->pLeaf!=0 );
10523     assert_nc( pIter->pLeaf->nn>4 );
10524     assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 );
10525     pIter->iPgidxOff = pIter->pLeaf->szLeaf+1;
10526     fts5SegIterLoadTerm(p, pIter, 0);
10527     fts5SegIterLoadNPos(p, pIter);
10528   }
10529 }
10530 
10531 /*
10532 ** This function is only ever called on iterators created by calls to
10533 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set.
10534 **
10535 ** The iterator is in an unusual state when this function is called: the
10536 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of
10537 ** the position-list size field for the first relevant rowid on the page.
10538 ** Fts5SegIter.rowid is set, but nPos and bDel are not.
10539 **
10540 ** This function advances the iterator so that it points to the last
10541 ** relevant rowid on the page and, if necessary, initializes the
10542 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator
10543 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first
10544 ** byte of the position list content associated with said rowid.
10545 */
10546 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){
10547   int eDetail = p->pConfig->eDetail;
10548   int n = pIter->pLeaf->szLeaf;
10549   int i = pIter->iLeafOffset;
10550   u8 *a = pIter->pLeaf->p;
10551   int iRowidOffset = 0;
10552 
10553   if( n>pIter->iEndofDoclist ){
10554     n = pIter->iEndofDoclist;
10555   }
10556 
10557   ASSERT_SZLEAF_OK(pIter->pLeaf);
10558   while( 1 ){
10559     u64 iDelta = 0;
10560 
10561     if( eDetail==FTS5_DETAIL_NONE ){
10562       /* todo */
10563       if( i<n && a[i]==0 ){
10564         i++;
10565         if( i<n && a[i]==0 ) i++;
10566       }
10567     }else{
10568       int nPos;
10569       int bDummy;
10570       i += fts5GetPoslistSize(&a[i], &nPos, &bDummy);
10571       i += nPos;
10572     }
10573     if( i>=n ) break;
10574     i += fts5GetVarint(&a[i], &iDelta);
10575     pIter->iRowid += iDelta;
10576 
10577     /* If necessary, grow the pIter->aRowidOffset[] array. */
10578     if( iRowidOffset>=pIter->nRowidOffset ){
10579       int nNew = pIter->nRowidOffset + 8;
10580       int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int));
10581       if( aNew==0 ){
10582         p->rc = SQLITE_NOMEM;
10583         break;
10584       }
10585       pIter->aRowidOffset = aNew;
10586       pIter->nRowidOffset = nNew;
10587     }
10588 
10589     pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset;
10590     pIter->iLeafOffset = i;
10591   }
10592   pIter->iRowidOffset = iRowidOffset;
10593   fts5SegIterLoadNPos(p, pIter);
10594 }
10595 
10596 /*
10597 **
10598 */
10599 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){
10600   assert( pIter->flags & FTS5_SEGITER_REVERSE );
10601   assert( pIter->flags & FTS5_SEGITER_ONETERM );
10602 
10603   fts5DataRelease(pIter->pLeaf);
10604   pIter->pLeaf = 0;
10605   while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){
10606     Fts5Data *pNew;
10607     pIter->iLeafPgno--;
10608     pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID(
10609           pIter->pSeg->iSegid, pIter->iLeafPgno
10610     ));
10611     if( pNew ){
10612       /* iTermLeafOffset may be equal to szLeaf if the term is the last
10613       ** thing on the page - i.e. the first rowid is on the following page.
10614       ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */
10615       if( pIter->iLeafPgno==pIter->iTermLeafPgno ){
10616         assert( pIter->pLeaf==0 );
10617         if( pIter->iTermLeafOffset<pNew->szLeaf ){
10618           pIter->pLeaf = pNew;
10619           pIter->iLeafOffset = pIter->iTermLeafOffset;
10620         }
10621       }else{
10622         int iRowidOff;
10623         iRowidOff = fts5LeafFirstRowidOff(pNew);
10624         if( iRowidOff ){
10625           if( iRowidOff>=pNew->szLeaf ){
10626             p->rc = FTS5_CORRUPT;
10627           }else{
10628             pIter->pLeaf = pNew;
10629             pIter->iLeafOffset = iRowidOff;
10630           }
10631         }
10632       }
10633 
10634       if( pIter->pLeaf ){
10635         u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset];
10636         pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid);
10637         break;
10638       }else{
10639         fts5DataRelease(pNew);
10640       }
10641     }
10642   }
10643 
10644   if( pIter->pLeaf ){
10645     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
10646     fts5SegIterReverseInitPage(p, pIter);
10647   }
10648 }
10649 
10650 /*
10651 ** Return true if the iterator passed as the second argument currently
10652 ** points to a delete marker. A delete marker is an entry with a 0 byte
10653 ** position-list.
10654 */
10655 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){
10656   Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
10657   return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0);
10658 }
10659 
10660 /*
10661 ** Advance iterator pIter to the next entry.
10662 **
10663 ** This version of fts5SegIterNext() is only used by reverse iterators.
10664 */
10665 static void fts5SegIterNext_Reverse(
10666   Fts5Index *p,                   /* FTS5 backend object */
10667   Fts5SegIter *pIter,             /* Iterator to advance */
10668   int *pbUnused                   /* Unused */
10669 ){
10670   assert( pIter->flags & FTS5_SEGITER_REVERSE );
10671   assert( pIter->pNextLeaf==0 );
10672   UNUSED_PARAM(pbUnused);
10673 
10674   if( pIter->iRowidOffset>0 ){
10675     u8 *a = pIter->pLeaf->p;
10676     int iOff;
10677     u64 iDelta;
10678 
10679     pIter->iRowidOffset--;
10680     pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset];
10681     fts5SegIterLoadNPos(p, pIter);
10682     iOff = pIter->iLeafOffset;
10683     if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){
10684       iOff += pIter->nPos;
10685     }
10686     fts5GetVarint(&a[iOff], &iDelta);
10687     pIter->iRowid -= iDelta;
10688   }else{
10689     fts5SegIterReverseNewPage(p, pIter);
10690   }
10691 }
10692 
10693 /*
10694 ** Advance iterator pIter to the next entry.
10695 **
10696 ** This version of fts5SegIterNext() is only used if detail=none and the
10697 ** iterator is not a reverse direction iterator.
10698 */
10699 static void fts5SegIterNext_None(
10700   Fts5Index *p,                   /* FTS5 backend object */
10701   Fts5SegIter *pIter,             /* Iterator to advance */
10702   int *pbNewTerm                  /* OUT: Set for new term */
10703 ){
10704   int iOff;
10705 
10706   assert( p->rc==SQLITE_OK );
10707   assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 );
10708   assert( p->pConfig->eDetail==FTS5_DETAIL_NONE );
10709 
10710   ASSERT_SZLEAF_OK(pIter->pLeaf);
10711   iOff = pIter->iLeafOffset;
10712 
10713   /* Next entry is on the next page */
10714   if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){
10715     fts5SegIterNextPage(p, pIter);
10716     if( p->rc || pIter->pLeaf==0 ) return;
10717     pIter->iRowid = 0;
10718     iOff = 4;
10719   }
10720 
10721   if( iOff<pIter->iEndofDoclist ){
10722     /* Next entry is on the current page */
10723     i64 iDelta;
10724     iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta);
10725     pIter->iLeafOffset = iOff;
10726     pIter->iRowid += iDelta;
10727   }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){
10728     if( pIter->pSeg ){
10729       int nKeep = 0;
10730       if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){
10731         iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep);
10732       }
10733       pIter->iLeafOffset = iOff;
10734       fts5SegIterLoadTerm(p, pIter, nKeep);
10735     }else{
10736       const u8 *pList = 0;
10737       const char *zTerm = 0;
10738       int nList;
10739       sqlite3Fts5HashScanNext(p->pHash);
10740       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
10741       if( pList==0 ) goto next_none_eof;
10742       pIter->pLeaf->p = (u8*)pList;
10743       pIter->pLeaf->nn = nList;
10744       pIter->pLeaf->szLeaf = nList;
10745       pIter->iEndofDoclist = nList;
10746       sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm);
10747       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
10748     }
10749 
10750     if( pbNewTerm ) *pbNewTerm = 1;
10751   }else{
10752     goto next_none_eof;
10753   }
10754 
10755   fts5SegIterLoadNPos(p, pIter);
10756 
10757   return;
10758  next_none_eof:
10759   fts5DataRelease(pIter->pLeaf);
10760   pIter->pLeaf = 0;
10761 }
10762 
10763 
10764 /*
10765 ** Advance iterator pIter to the next entry.
10766 **
10767 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It
10768 ** is not considered an error if the iterator reaches EOF. If an error has
10769 ** already occurred when this function is called, it is a no-op.
10770 */
10771 static void fts5SegIterNext(
10772   Fts5Index *p,                   /* FTS5 backend object */
10773   Fts5SegIter *pIter,             /* Iterator to advance */
10774   int *pbNewTerm                  /* OUT: Set for new term */
10775 ){
10776   Fts5Data *pLeaf = pIter->pLeaf;
10777   int iOff;
10778   int bNewTerm = 0;
10779   int nKeep = 0;
10780   u8 *a;
10781   int n;
10782 
10783   assert( pbNewTerm==0 || *pbNewTerm==0 );
10784   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
10785 
10786   /* Search for the end of the position list within the current page. */
10787   a = pLeaf->p;
10788   n = pLeaf->szLeaf;
10789 
10790   ASSERT_SZLEAF_OK(pLeaf);
10791   iOff = pIter->iLeafOffset + pIter->nPos;
10792 
10793   if( iOff<n ){
10794     /* The next entry is on the current page. */
10795     assert_nc( iOff<=pIter->iEndofDoclist );
10796     if( iOff>=pIter->iEndofDoclist ){
10797       bNewTerm = 1;
10798       if( iOff!=fts5LeafFirstTermOff(pLeaf) ){
10799         iOff += fts5GetVarint32(&a[iOff], nKeep);
10800       }
10801     }else{
10802       u64 iDelta;
10803       iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta);
10804       pIter->iRowid += iDelta;
10805       assert_nc( iDelta>0 );
10806     }
10807     pIter->iLeafOffset = iOff;
10808 
10809   }else if( pIter->pSeg==0 ){
10810     const u8 *pList = 0;
10811     const char *zTerm = 0;
10812     int nList = 0;
10813     assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm );
10814     if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){
10815       sqlite3Fts5HashScanNext(p->pHash);
10816       sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList);
10817     }
10818     if( pList==0 ){
10819       fts5DataRelease(pIter->pLeaf);
10820       pIter->pLeaf = 0;
10821     }else{
10822       pIter->pLeaf->p = (u8*)pList;
10823       pIter->pLeaf->nn = nList;
10824       pIter->pLeaf->szLeaf = nList;
10825       pIter->iEndofDoclist = nList+1;
10826       sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm),
10827           (u8*)zTerm);
10828       pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid);
10829       *pbNewTerm = 1;
10830     }
10831   }else{
10832     iOff = 0;
10833     /* Next entry is not on the current page */
10834     while( iOff==0 ){
10835       fts5SegIterNextPage(p, pIter);
10836       pLeaf = pIter->pLeaf;
10837       if( pLeaf==0 ) break;
10838       ASSERT_SZLEAF_OK(pLeaf);
10839       if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){
10840         iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid);
10841         pIter->iLeafOffset = iOff;
10842 
10843         if( pLeaf->nn>pLeaf->szLeaf ){
10844           pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10845               &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist
10846           );
10847         }
10848       }
10849       else if( pLeaf->nn>pLeaf->szLeaf ){
10850         pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32(
10851             &pLeaf->p[pLeaf->szLeaf], iOff
10852         );
10853         pIter->iLeafOffset = iOff;
10854         pIter->iEndofDoclist = iOff;
10855         bNewTerm = 1;
10856       }
10857       assert_nc( iOff<pLeaf->szLeaf );
10858       if( iOff>pLeaf->szLeaf ){
10859         p->rc = FTS5_CORRUPT;
10860         return;
10861       }
10862     }
10863   }
10864 
10865   /* Check if the iterator is now at EOF. If so, return early. */
10866   if( pIter->pLeaf ){
10867     if( bNewTerm ){
10868       if( pIter->flags & FTS5_SEGITER_ONETERM ){
10869         fts5DataRelease(pIter->pLeaf);
10870         pIter->pLeaf = 0;
10871       }else{
10872         fts5SegIterLoadTerm(p, pIter, nKeep);
10873         fts5SegIterLoadNPos(p, pIter);
10874         if( pbNewTerm ) *pbNewTerm = 1;
10875       }
10876     }else{
10877       /* The following could be done by calling fts5SegIterLoadNPos(). But
10878       ** this block is particularly performance critical, so equivalent
10879       ** code is inlined.  */
10880       int nSz;
10881       assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn );
10882       fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz);
10883       pIter->bDel = (nSz & 0x0001);
10884       pIter->nPos = nSz>>1;
10885       assert_nc( pIter->nPos>=0 );
10886     }
10887   }
10888 }
10889 
10890 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; }
10891 
10892 #define fts5IndexSkipVarint(a, iOff) {            \
10893   int iEnd = iOff+9;                              \
10894   while( (a[iOff++] & 0x80) && iOff<iEnd );       \
10895 }
10896 
10897 /*
10898 ** Iterator pIter currently points to the first rowid in a doclist. This
10899 ** function sets the iterator up so that iterates in reverse order through
10900 ** the doclist.
10901 */
10902 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){
10903   Fts5DlidxIter *pDlidx = pIter->pDlidx;
10904   Fts5Data *pLast = 0;
10905   int pgnoLast = 0;
10906 
10907   if( pDlidx ){
10908     int iSegid = pIter->pSeg->iSegid;
10909     pgnoLast = fts5DlidxIterPgno(pDlidx);
10910     pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast));
10911   }else{
10912     Fts5Data *pLeaf = pIter->pLeaf;         /* Current leaf data */
10913 
10914     /* Currently, Fts5SegIter.iLeafOffset points to the first byte of
10915     ** position-list content for the current rowid. Back it up so that it
10916     ** points to the start of the position-list size field. */
10917     int iPoslist;
10918     if( pIter->iTermLeafPgno==pIter->iLeafPgno ){
10919       iPoslist = pIter->iTermLeafOffset;
10920     }else{
10921       iPoslist = 4;
10922     }
10923     fts5IndexSkipVarint(pLeaf->p, iPoslist);
10924     pIter->iLeafOffset = iPoslist;
10925 
10926     /* If this condition is true then the largest rowid for the current
10927     ** term may not be stored on the current page. So search forward to
10928     ** see where said rowid really is.  */
10929     if( pIter->iEndofDoclist>=pLeaf->szLeaf ){
10930       int pgno;
10931       Fts5StructureSegment *pSeg = pIter->pSeg;
10932 
10933       /* The last rowid in the doclist may not be on the current page. Search
10934       ** forward to find the page containing the last rowid.  */
10935       for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){
10936         i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno);
10937         Fts5Data *pNew = fts5LeafRead(p, iAbs);
10938         if( pNew ){
10939           int iRowid, bTermless;
10940           iRowid = fts5LeafFirstRowidOff(pNew);
10941           bTermless = fts5LeafIsTermless(pNew);
10942           if( iRowid ){
10943             SWAPVAL(Fts5Data*, pNew, pLast);
10944             pgnoLast = pgno;
10945           }
10946           fts5DataRelease(pNew);
10947           if( bTermless==0 ) break;
10948         }
10949       }
10950     }
10951   }
10952 
10953   /* If pLast is NULL at this point, then the last rowid for this doclist
10954   ** lies on the page currently indicated by the iterator. In this case
10955   ** pIter->iLeafOffset is already set to point to the position-list size
10956   ** field associated with the first relevant rowid on the page.
10957   **
10958   ** Or, if pLast is non-NULL, then it is the page that contains the last
10959   ** rowid. In this case configure the iterator so that it points to the
10960   ** first rowid on this page.
10961   */
10962   if( pLast ){
10963     int iOff;
10964     fts5DataRelease(pIter->pLeaf);
10965     pIter->pLeaf = pLast;
10966     pIter->iLeafPgno = pgnoLast;
10967     iOff = fts5LeafFirstRowidOff(pLast);
10968     if( iOff>pLast->szLeaf ){
10969       p->rc = FTS5_CORRUPT;
10970       return;
10971     }
10972     iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid);
10973     pIter->iLeafOffset = iOff;
10974 
10975     if( fts5LeafIsTermless(pLast) ){
10976       pIter->iEndofDoclist = pLast->nn+1;
10977     }else{
10978       pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast);
10979     }
10980   }
10981 
10982   fts5SegIterReverseInitPage(p, pIter);
10983 }
10984 
10985 /*
10986 ** Iterator pIter currently points to the first rowid of a doclist.
10987 ** There is a doclist-index associated with the final term on the current
10988 ** page. If the current term is the last term on the page, load the
10989 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx).
10990 */
10991 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){
10992   int iSeg = pIter->pSeg->iSegid;
10993   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
10994   Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
10995 
10996   assert( pIter->flags & FTS5_SEGITER_ONETERM );
10997   assert( pIter->pDlidx==0 );
10998 
10999   /* Check if the current doclist ends on this page. If it does, return
11000   ** early without loading the doclist-index (as it belongs to a different
11001   ** term. */
11002   if( pIter->iTermLeafPgno==pIter->iLeafPgno
11003    && pIter->iEndofDoclist<pLeaf->szLeaf
11004   ){
11005     return;
11006   }
11007 
11008   pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno);
11009 }
11010 
11011 /*
11012 ** The iterator object passed as the second argument currently contains
11013 ** no valid values except for the Fts5SegIter.pLeaf member variable. This
11014 ** function searches the leaf page for a term matching (pTerm/nTerm).
11015 **
11016 ** If the specified term is found on the page, then the iterator is left
11017 ** pointing to it. If argument bGe is zero and the term is not found,
11018 ** the iterator is left pointing at EOF.
11019 **
11020 ** If bGe is non-zero and the specified term is not found, then the
11021 ** iterator is left pointing to the smallest term in the segment that
11022 ** is larger than the specified term, even if this term is not on the
11023 ** current page.
11024 */
11025 static void fts5LeafSeek(
11026   Fts5Index *p,                   /* Leave any error code here */
11027   int bGe,                        /* True for a >= search */
11028   Fts5SegIter *pIter,             /* Iterator to seek */
11029   const u8 *pTerm, int nTerm      /* Term to search for */
11030 ){
11031   u32 iOff;
11032   const u8 *a = pIter->pLeaf->p;
11033   u32 n = (u32)pIter->pLeaf->nn;
11034 
11035   u32 nMatch = 0;
11036   u32 nKeep = 0;
11037   u32 nNew = 0;
11038   u32 iTermOff;
11039   u32 iPgidx;                     /* Current offset in pgidx */
11040   int bEndOfPage = 0;
11041 
11042   assert( p->rc==SQLITE_OK );
11043 
11044   iPgidx = (u32)pIter->pLeaf->szLeaf;
11045   iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff);
11046   iOff = iTermOff;
11047   if( iOff>n ){
11048     p->rc = FTS5_CORRUPT;
11049     return;
11050   }
11051 
11052   while( 1 ){
11053 
11054     /* Figure out how many new bytes are in this term */
11055     fts5FastGetVarint32(a, iOff, nNew);
11056     if( nKeep<nMatch ){
11057       goto search_failed;
11058     }
11059 
11060     assert( nKeep>=nMatch );
11061     if( nKeep==nMatch ){
11062       u32 nCmp;
11063       u32 i;
11064       nCmp = (u32)MIN(nNew, nTerm-nMatch);
11065       for(i=0; i<nCmp; i++){
11066         if( a[iOff+i]!=pTerm[nMatch+i] ) break;
11067       }
11068       nMatch += i;
11069 
11070       if( (u32)nTerm==nMatch ){
11071         if( i==nNew ){
11072           goto search_success;
11073         }else{
11074           goto search_failed;
11075         }
11076       }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){
11077         goto search_failed;
11078       }
11079     }
11080 
11081     if( iPgidx>=n ){
11082       bEndOfPage = 1;
11083       break;
11084     }
11085 
11086     iPgidx += fts5GetVarint32(&a[iPgidx], nKeep);
11087     iTermOff += nKeep;
11088     iOff = iTermOff;
11089 
11090     if( iOff>=n ){
11091       p->rc = FTS5_CORRUPT;
11092       return;
11093     }
11094 
11095     /* Read the nKeep field of the next term. */
11096     fts5FastGetVarint32(a, iOff, nKeep);
11097   }
11098 
11099  search_failed:
11100   if( bGe==0 ){
11101     fts5DataRelease(pIter->pLeaf);
11102     pIter->pLeaf = 0;
11103     return;
11104   }else if( bEndOfPage ){
11105     do {
11106       fts5SegIterNextPage(p, pIter);
11107       if( pIter->pLeaf==0 ) return;
11108       a = pIter->pLeaf->p;
11109       if( fts5LeafIsTermless(pIter->pLeaf)==0 ){
11110         iPgidx = (u32)pIter->pLeaf->szLeaf;
11111         iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff);
11112         if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){
11113           p->rc = FTS5_CORRUPT;
11114           return;
11115         }else{
11116           nKeep = 0;
11117           iTermOff = iOff;
11118           n = (u32)pIter->pLeaf->nn;
11119           iOff += fts5GetVarint32(&a[iOff], nNew);
11120           break;
11121         }
11122       }
11123     }while( 1 );
11124   }
11125 
11126  search_success:
11127   if( (i64)iOff+nNew>n || nNew<1 ){
11128     p->rc = FTS5_CORRUPT;
11129     return;
11130   }
11131   pIter->iLeafOffset = iOff + nNew;
11132   pIter->iTermLeafOffset = pIter->iLeafOffset;
11133   pIter->iTermLeafPgno = pIter->iLeafPgno;
11134 
11135   fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm);
11136   fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]);
11137 
11138   if( iPgidx>=n ){
11139     pIter->iEndofDoclist = pIter->pLeaf->nn+1;
11140   }else{
11141     int nExtra;
11142     iPgidx += fts5GetVarint32(&a[iPgidx], nExtra);
11143     pIter->iEndofDoclist = iTermOff + nExtra;
11144   }
11145   pIter->iPgidxOff = iPgidx;
11146 
11147   fts5SegIterLoadRowid(p, pIter);
11148   fts5SegIterLoadNPos(p, pIter);
11149 }
11150 
11151 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){
11152   if( p->pIdxSelect==0 ){
11153     Fts5Config *pConfig = p->pConfig;
11154     fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
11155           "SELECT pgno FROM '%q'.'%q_idx' WHERE "
11156           "segid=? AND term<=? ORDER BY term DESC LIMIT 1",
11157           pConfig->zDb, pConfig->zName
11158     ));
11159   }
11160   return p->pIdxSelect;
11161 }
11162 
11163 /*
11164 ** Initialize the object pIter to point to term pTerm/nTerm within segment
11165 ** pSeg. If there is no such term in the index, the iterator is set to EOF.
11166 **
11167 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
11168 ** an error has already occurred when this function is called, it is a no-op.
11169 */
11170 static void fts5SegIterSeekInit(
11171   Fts5Index *p,                   /* FTS5 backend */
11172   const u8 *pTerm, int nTerm,     /* Term to seek to */
11173   int flags,                      /* Mask of FTS5INDEX_XXX flags */
11174   Fts5StructureSegment *pSeg,     /* Description of segment */
11175   Fts5SegIter *pIter              /* Object to populate */
11176 ){
11177   int iPg = 1;
11178   int bGe = (flags & FTS5INDEX_QUERY_SCAN);
11179   int bDlidx = 0;                 /* True if there is a doclist-index */
11180   sqlite3_stmt *pIdxSelect = 0;
11181 
11182   assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 );
11183   assert( pTerm && nTerm );
11184   memset(pIter, 0, sizeof(*pIter));
11185   pIter->pSeg = pSeg;
11186 
11187   /* This block sets stack variable iPg to the leaf page number that may
11188   ** contain term (pTerm/nTerm), if it is present in the segment. */
11189   pIdxSelect = fts5IdxSelectStmt(p);
11190   if( p->rc ) return;
11191   sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid);
11192   sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
11193   if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){
11194     i64 val = sqlite3_column_int(pIdxSelect, 0);
11195     iPg = (int)(val>>1);
11196     bDlidx = (val & 0x0001);
11197   }
11198   p->rc = sqlite3_reset(pIdxSelect);
11199   sqlite3_bind_null(pIdxSelect, 2);
11200 
11201   if( iPg<pSeg->pgnoFirst ){
11202     iPg = pSeg->pgnoFirst;
11203     bDlidx = 0;
11204   }
11205 
11206   pIter->iLeafPgno = iPg - 1;
11207   fts5SegIterNextPage(p, pIter);
11208 
11209   if( pIter->pLeaf ){
11210     fts5LeafSeek(p, bGe, pIter, pTerm, nTerm);
11211   }
11212 
11213   if( p->rc==SQLITE_OK && bGe==0 ){
11214     pIter->flags |= FTS5_SEGITER_ONETERM;
11215     if( pIter->pLeaf ){
11216       if( flags & FTS5INDEX_QUERY_DESC ){
11217         pIter->flags |= FTS5_SEGITER_REVERSE;
11218       }
11219       if( bDlidx ){
11220         fts5SegIterLoadDlidx(p, pIter);
11221       }
11222       if( flags & FTS5INDEX_QUERY_DESC ){
11223         fts5SegIterReverse(p, pIter);
11224       }
11225     }
11226   }
11227 
11228   fts5SegIterSetNext(p, pIter);
11229 
11230   /* Either:
11231   **
11232   **   1) an error has occurred, or
11233   **   2) the iterator points to EOF, or
11234   **   3) the iterator points to an entry with term (pTerm/nTerm), or
11235   **   4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points
11236   **      to an entry with a term greater than or equal to (pTerm/nTerm).
11237   */
11238   assert_nc( p->rc!=SQLITE_OK                                       /* 1 */
11239    || pIter->pLeaf==0                                               /* 2 */
11240    || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0          /* 3 */
11241    || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0)  /* 4 */
11242   );
11243 }
11244 
11245 /*
11246 ** Initialize the object pIter to point to term pTerm/nTerm within the
11247 ** in-memory hash table. If there is no such term in the hash-table, the
11248 ** iterator is set to EOF.
11249 **
11250 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If
11251 ** an error has already occurred when this function is called, it is a no-op.
11252 */
11253 static void fts5SegIterHashInit(
11254   Fts5Index *p,                   /* FTS5 backend */
11255   const u8 *pTerm, int nTerm,     /* Term to seek to */
11256   int flags,                      /* Mask of FTS5INDEX_XXX flags */
11257   Fts5SegIter *pIter              /* Object to populate */
11258 ){
11259   int nList = 0;
11260   const u8 *z = 0;
11261   int n = 0;
11262   Fts5Data *pLeaf = 0;
11263 
11264   assert( p->pHash );
11265   assert( p->rc==SQLITE_OK );
11266 
11267   if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){
11268     const u8 *pList = 0;
11269 
11270     p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm);
11271     sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList);
11272     n = (z ? (int)strlen((const char*)z) : 0);
11273     if( pList ){
11274       pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data));
11275       if( pLeaf ){
11276         pLeaf->p = (u8*)pList;
11277       }
11278     }
11279   }else{
11280     p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data),
11281         (const char*)pTerm, nTerm, (void**)&pLeaf, &nList
11282     );
11283     if( pLeaf ){
11284       pLeaf->p = (u8*)&pLeaf[1];
11285     }
11286     z = pTerm;
11287     n = nTerm;
11288     pIter->flags |= FTS5_SEGITER_ONETERM;
11289   }
11290 
11291   if( pLeaf ){
11292     sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z);
11293     pLeaf->nn = pLeaf->szLeaf = nList;
11294     pIter->pLeaf = pLeaf;
11295     pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid);
11296     pIter->iEndofDoclist = pLeaf->nn;
11297 
11298     if( flags & FTS5INDEX_QUERY_DESC ){
11299       pIter->flags |= FTS5_SEGITER_REVERSE;
11300       fts5SegIterReverseInitPage(p, pIter);
11301     }else{
11302       fts5SegIterLoadNPos(p, pIter);
11303     }
11304   }
11305 
11306   fts5SegIterSetNext(p, pIter);
11307 }
11308 
11309 /*
11310 ** Zero the iterator passed as the only argument.
11311 */
11312 static void fts5SegIterClear(Fts5SegIter *pIter){
11313   fts5BufferFree(&pIter->term);
11314   fts5DataRelease(pIter->pLeaf);
11315   fts5DataRelease(pIter->pNextLeaf);
11316   fts5DlidxIterFree(pIter->pDlidx);
11317   sqlite3_free(pIter->aRowidOffset);
11318   memset(pIter, 0, sizeof(Fts5SegIter));
11319 }
11320 
11321 #ifdef SQLITE_DEBUG
11322 
11323 /*
11324 ** This function is used as part of the big assert() procedure implemented by
11325 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored
11326 ** in *pRes is the correct result of comparing the current positions of the
11327 ** two iterators.
11328 */
11329 static void fts5AssertComparisonResult(
11330   Fts5Iter *pIter,
11331   Fts5SegIter *p1,
11332   Fts5SegIter *p2,
11333   Fts5CResult *pRes
11334 ){
11335   int i1 = p1 - pIter->aSeg;
11336   int i2 = p2 - pIter->aSeg;
11337 
11338   if( p1->pLeaf || p2->pLeaf ){
11339     if( p1->pLeaf==0 ){
11340       assert( pRes->iFirst==i2 );
11341     }else if( p2->pLeaf==0 ){
11342       assert( pRes->iFirst==i1 );
11343     }else{
11344       int nMin = MIN(p1->term.n, p2->term.n);
11345       int res = fts5Memcmp(p1->term.p, p2->term.p, nMin);
11346       if( res==0 ) res = p1->term.n - p2->term.n;
11347 
11348       if( res==0 ){
11349         assert( pRes->bTermEq==1 );
11350         assert( p1->iRowid!=p2->iRowid );
11351         res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1;
11352       }else{
11353         assert( pRes->bTermEq==0 );
11354       }
11355 
11356       if( res<0 ){
11357         assert( pRes->iFirst==i1 );
11358       }else{
11359         assert( pRes->iFirst==i2 );
11360       }
11361     }
11362   }
11363 }
11364 
11365 /*
11366 ** This function is a no-op unless SQLITE_DEBUG is defined when this module
11367 ** is compiled. In that case, this function is essentially an assert()
11368 ** statement used to verify that the contents of the pIter->aFirst[] array
11369 ** are correct.
11370 */
11371 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){
11372   if( p->rc==SQLITE_OK ){
11373     Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
11374     int i;
11375 
11376     assert( (pFirst->pLeaf==0)==pIter->base.bEof );
11377 
11378     /* Check that pIter->iSwitchRowid is set correctly. */
11379     for(i=0; i<pIter->nSeg; i++){
11380       Fts5SegIter *p1 = &pIter->aSeg[i];
11381       assert( p1==pFirst
11382            || p1->pLeaf==0
11383            || fts5BufferCompare(&pFirst->term, &p1->term)
11384            || p1->iRowid==pIter->iSwitchRowid
11385            || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev
11386       );
11387     }
11388 
11389     for(i=0; i<pIter->nSeg; i+=2){
11390       Fts5SegIter *p1 = &pIter->aSeg[i];
11391       Fts5SegIter *p2 = &pIter->aSeg[i+1];
11392       Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2];
11393       fts5AssertComparisonResult(pIter, p1, p2, pRes);
11394     }
11395 
11396     for(i=1; i<(pIter->nSeg / 2); i+=2){
11397       Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ];
11398       Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ];
11399       Fts5CResult *pRes = &pIter->aFirst[i];
11400       fts5AssertComparisonResult(pIter, p1, p2, pRes);
11401     }
11402   }
11403 }
11404 #else
11405 # define fts5AssertMultiIterSetup(x,y)
11406 #endif
11407 
11408 /*
11409 ** Do the comparison necessary to populate pIter->aFirst[iOut].
11410 **
11411 ** If the returned value is non-zero, then it is the index of an entry
11412 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing
11413 ** to a key that is a duplicate of another, higher priority,
11414 ** segment-iterator in the pSeg->aSeg[] array.
11415 */
11416 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){
11417   int i1;                         /* Index of left-hand Fts5SegIter */
11418   int i2;                         /* Index of right-hand Fts5SegIter */
11419   int iRes;
11420   Fts5SegIter *p1;                /* Left-hand Fts5SegIter */
11421   Fts5SegIter *p2;                /* Right-hand Fts5SegIter */
11422   Fts5CResult *pRes = &pIter->aFirst[iOut];
11423 
11424   assert( iOut<pIter->nSeg && iOut>0 );
11425   assert( pIter->bRev==0 || pIter->bRev==1 );
11426 
11427   if( iOut>=(pIter->nSeg/2) ){
11428     i1 = (iOut - pIter->nSeg/2) * 2;
11429     i2 = i1 + 1;
11430   }else{
11431     i1 = pIter->aFirst[iOut*2].iFirst;
11432     i2 = pIter->aFirst[iOut*2+1].iFirst;
11433   }
11434   p1 = &pIter->aSeg[i1];
11435   p2 = &pIter->aSeg[i2];
11436 
11437   pRes->bTermEq = 0;
11438   if( p1->pLeaf==0 ){           /* If p1 is at EOF */
11439     iRes = i2;
11440   }else if( p2->pLeaf==0 ){     /* If p2 is at EOF */
11441     iRes = i1;
11442   }else{
11443     int res = fts5BufferCompare(&p1->term, &p2->term);
11444     if( res==0 ){
11445       assert_nc( i2>i1 );
11446       assert_nc( i2!=0 );
11447       pRes->bTermEq = 1;
11448       if( p1->iRowid==p2->iRowid ){
11449         p1->bDel = p2->bDel;
11450         return i2;
11451       }
11452       res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1;
11453     }
11454     assert( res!=0 );
11455     if( res<0 ){
11456       iRes = i1;
11457     }else{
11458       iRes = i2;
11459     }
11460   }
11461 
11462   pRes->iFirst = (u16)iRes;
11463   return 0;
11464 }
11465 
11466 /*
11467 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno.
11468 ** It is an error if leaf iLeafPgno does not exist or contains no rowids.
11469 */
11470 static void fts5SegIterGotoPage(
11471   Fts5Index *p,                   /* FTS5 backend object */
11472   Fts5SegIter *pIter,             /* Iterator to advance */
11473   int iLeafPgno
11474 ){
11475   assert( iLeafPgno>pIter->iLeafPgno );
11476 
11477   if( iLeafPgno>pIter->pSeg->pgnoLast ){
11478     p->rc = FTS5_CORRUPT;
11479   }else{
11480     fts5DataRelease(pIter->pNextLeaf);
11481     pIter->pNextLeaf = 0;
11482     pIter->iLeafPgno = iLeafPgno-1;
11483     fts5SegIterNextPage(p, pIter);
11484     assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno );
11485 
11486     if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){
11487       int iOff;
11488       u8 *a = pIter->pLeaf->p;
11489       int n = pIter->pLeaf->szLeaf;
11490 
11491       iOff = fts5LeafFirstRowidOff(pIter->pLeaf);
11492       if( iOff<4 || iOff>=n ){
11493         p->rc = FTS5_CORRUPT;
11494       }else{
11495         iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid);
11496         pIter->iLeafOffset = iOff;
11497         fts5SegIterLoadNPos(p, pIter);
11498       }
11499     }
11500   }
11501 }
11502 
11503 /*
11504 ** Advance the iterator passed as the second argument until it is at or
11505 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is
11506 ** always advanced at least once.
11507 */
11508 static void fts5SegIterNextFrom(
11509   Fts5Index *p,                   /* FTS5 backend object */
11510   Fts5SegIter *pIter,             /* Iterator to advance */
11511   i64 iMatch                      /* Advance iterator at least this far */
11512 ){
11513   int bRev = (pIter->flags & FTS5_SEGITER_REVERSE);
11514   Fts5DlidxIter *pDlidx = pIter->pDlidx;
11515   int iLeafPgno = pIter->iLeafPgno;
11516   int bMove = 1;
11517 
11518   assert( pIter->flags & FTS5_SEGITER_ONETERM );
11519   assert( pIter->pDlidx );
11520   assert( pIter->pLeaf );
11521 
11522   if( bRev==0 ){
11523     while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){
11524       iLeafPgno = fts5DlidxIterPgno(pDlidx);
11525       fts5DlidxIterNext(p, pDlidx);
11526     }
11527     assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc );
11528     if( iLeafPgno>pIter->iLeafPgno ){
11529       fts5SegIterGotoPage(p, pIter, iLeafPgno);
11530       bMove = 0;
11531     }
11532   }else{
11533     assert( pIter->pNextLeaf==0 );
11534     assert( iMatch<pIter->iRowid );
11535     while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){
11536       fts5DlidxIterPrev(p, pDlidx);
11537     }
11538     iLeafPgno = fts5DlidxIterPgno(pDlidx);
11539 
11540     assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno );
11541 
11542     if( iLeafPgno<pIter->iLeafPgno ){
11543       pIter->iLeafPgno = iLeafPgno+1;
11544       fts5SegIterReverseNewPage(p, pIter);
11545       bMove = 0;
11546     }
11547   }
11548 
11549   do{
11550     if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0);
11551     if( pIter->pLeaf==0 ) break;
11552     if( bRev==0 && pIter->iRowid>=iMatch ) break;
11553     if( bRev!=0 && pIter->iRowid<=iMatch ) break;
11554     bMove = 1;
11555   }while( p->rc==SQLITE_OK );
11556 }
11557 
11558 
11559 /*
11560 ** Free the iterator object passed as the second argument.
11561 */
11562 static void fts5MultiIterFree(Fts5Iter *pIter){
11563   if( pIter ){
11564     int i;
11565     for(i=0; i<pIter->nSeg; i++){
11566       fts5SegIterClear(&pIter->aSeg[i]);
11567     }
11568     fts5BufferFree(&pIter->poslist);
11569     sqlite3_free(pIter);
11570   }
11571 }
11572 
11573 static void fts5MultiIterAdvanced(
11574   Fts5Index *p,                   /* FTS5 backend to iterate within */
11575   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
11576   int iChanged,                   /* Index of sub-iterator just advanced */
11577   int iMinset                     /* Minimum entry in aFirst[] to set */
11578 ){
11579   int i;
11580   for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){
11581     int iEq;
11582     if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){
11583       Fts5SegIter *pSeg = &pIter->aSeg[iEq];
11584       assert( p->rc==SQLITE_OK );
11585       pSeg->xNext(p, pSeg, 0);
11586       i = pIter->nSeg + iEq;
11587     }
11588   }
11589 }
11590 
11591 /*
11592 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still
11593 ** points to the same term though - just a different rowid. This function
11594 ** attempts to update the contents of the pIter->aFirst[] accordingly.
11595 ** If it does so successfully, 0 is returned. Otherwise 1.
11596 **
11597 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced()
11598 ** on the iterator instead. That function does the same as this one, except
11599 ** that it deals with more complicated cases as well.
11600 */
11601 static int fts5MultiIterAdvanceRowid(
11602   Fts5Iter *pIter,                /* Iterator to update aFirst[] array for */
11603   int iChanged,                   /* Index of sub-iterator just advanced */
11604   Fts5SegIter **ppFirst
11605 ){
11606   Fts5SegIter *pNew = &pIter->aSeg[iChanged];
11607 
11608   if( pNew->iRowid==pIter->iSwitchRowid
11609    || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev
11610   ){
11611     int i;
11612     Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001];
11613     pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64;
11614     for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){
11615       Fts5CResult *pRes = &pIter->aFirst[i];
11616 
11617       assert( pNew->pLeaf );
11618       assert( pRes->bTermEq==0 || pOther->pLeaf );
11619 
11620       if( pRes->bTermEq ){
11621         if( pNew->iRowid==pOther->iRowid ){
11622           return 1;
11623         }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){
11624           pIter->iSwitchRowid = pOther->iRowid;
11625           pNew = pOther;
11626         }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){
11627           pIter->iSwitchRowid = pOther->iRowid;
11628         }
11629       }
11630       pRes->iFirst = (u16)(pNew - pIter->aSeg);
11631       if( i==1 ) break;
11632 
11633       pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ];
11634     }
11635   }
11636 
11637   *ppFirst = pNew;
11638   return 0;
11639 }
11640 
11641 /*
11642 ** Set the pIter->bEof variable based on the state of the sub-iterators.
11643 */
11644 static void fts5MultiIterSetEof(Fts5Iter *pIter){
11645   Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
11646   pIter->base.bEof = pSeg->pLeaf==0;
11647   pIter->iSwitchRowid = pSeg->iRowid;
11648 }
11649 
11650 /*
11651 ** Move the iterator to the next entry.
11652 **
11653 ** If an error occurs, an error code is left in Fts5Index.rc. It is not
11654 ** considered an error if the iterator reaches EOF, or if it is already at
11655 ** EOF when this function is called.
11656 */
11657 static void fts5MultiIterNext(
11658   Fts5Index *p,
11659   Fts5Iter *pIter,
11660   int bFrom,                      /* True if argument iFrom is valid */
11661   i64 iFrom                       /* Advance at least as far as this */
11662 ){
11663   int bUseFrom = bFrom;
11664   assert( pIter->base.bEof==0 );
11665   while( p->rc==SQLITE_OK ){
11666     int iFirst = pIter->aFirst[1].iFirst;
11667     int bNewTerm = 0;
11668     Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
11669     assert( p->rc==SQLITE_OK );
11670     if( bUseFrom && pSeg->pDlidx ){
11671       fts5SegIterNextFrom(p, pSeg, iFrom);
11672     }else{
11673       pSeg->xNext(p, pSeg, &bNewTerm);
11674     }
11675 
11676     if( pSeg->pLeaf==0 || bNewTerm
11677      || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
11678     ){
11679       fts5MultiIterAdvanced(p, pIter, iFirst, 1);
11680       fts5MultiIterSetEof(pIter);
11681       pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst];
11682       if( pSeg->pLeaf==0 ) return;
11683     }
11684 
11685     fts5AssertMultiIterSetup(p, pIter);
11686     assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf );
11687     if( pIter->bSkipEmpty==0 || pSeg->nPos ){
11688       pIter->xSetOutputs(pIter, pSeg);
11689       return;
11690     }
11691     bUseFrom = 0;
11692   }
11693 }
11694 
11695 static void fts5MultiIterNext2(
11696   Fts5Index *p,
11697   Fts5Iter *pIter,
11698   int *pbNewTerm                  /* OUT: True if *might* be new term */
11699 ){
11700   assert( pIter->bSkipEmpty );
11701   if( p->rc==SQLITE_OK ){
11702     *pbNewTerm = 0;
11703     do{
11704       int iFirst = pIter->aFirst[1].iFirst;
11705       Fts5SegIter *pSeg = &pIter->aSeg[iFirst];
11706       int bNewTerm = 0;
11707 
11708       assert( p->rc==SQLITE_OK );
11709       pSeg->xNext(p, pSeg, &bNewTerm);
11710       if( pSeg->pLeaf==0 || bNewTerm
11711        || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg)
11712       ){
11713         fts5MultiIterAdvanced(p, pIter, iFirst, 1);
11714         fts5MultiIterSetEof(pIter);
11715         *pbNewTerm = 1;
11716       }
11717       fts5AssertMultiIterSetup(p, pIter);
11718 
11719     }while( fts5MultiIterIsEmpty(p, pIter) );
11720   }
11721 }
11722 
11723 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){
11724   UNUSED_PARAM2(pUnused1, pUnused2);
11725 }
11726 
11727 static Fts5Iter *fts5MultiIterAlloc(
11728   Fts5Index *p,                   /* FTS5 backend to iterate within */
11729   int nSeg
11730 ){
11731   Fts5Iter *pNew;
11732   int nSlot;                      /* Power of two >= nSeg */
11733 
11734   for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2);
11735   pNew = fts5IdxMalloc(p,
11736       sizeof(Fts5Iter) +                  /* pNew */
11737       sizeof(Fts5SegIter) * (nSlot-1) +   /* pNew->aSeg[] */
11738       sizeof(Fts5CResult) * nSlot         /* pNew->aFirst[] */
11739   );
11740   if( pNew ){
11741     pNew->nSeg = nSlot;
11742     pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot];
11743     pNew->pIndex = p;
11744     pNew->xSetOutputs = fts5IterSetOutputs_Noop;
11745   }
11746   return pNew;
11747 }
11748 
11749 static void fts5PoslistCallback(
11750   Fts5Index *pUnused,
11751   void *pContext,
11752   const u8 *pChunk, int nChunk
11753 ){
11754   UNUSED_PARAM(pUnused);
11755   assert_nc( nChunk>=0 );
11756   if( nChunk>0 ){
11757     fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk);
11758   }
11759 }
11760 
11761 typedef struct PoslistCallbackCtx PoslistCallbackCtx;
11762 struct PoslistCallbackCtx {
11763   Fts5Buffer *pBuf;               /* Append to this buffer */
11764   Fts5Colset *pColset;            /* Restrict matches to this column */
11765   int eState;                     /* See above */
11766 };
11767 
11768 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx;
11769 struct PoslistOffsetsCtx {
11770   Fts5Buffer *pBuf;               /* Append to this buffer */
11771   Fts5Colset *pColset;            /* Restrict matches to this column */
11772   int iRead;
11773   int iWrite;
11774 };
11775 
11776 /*
11777 ** TODO: Make this more efficient!
11778 */
11779 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){
11780   int i;
11781   for(i=0; i<pColset->nCol; i++){
11782     if( pColset->aiCol[i]==iCol ) return 1;
11783   }
11784   return 0;
11785 }
11786 
11787 static void fts5PoslistOffsetsCallback(
11788   Fts5Index *pUnused,
11789   void *pContext,
11790   const u8 *pChunk, int nChunk
11791 ){
11792   PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext;
11793   UNUSED_PARAM(pUnused);
11794   assert_nc( nChunk>=0 );
11795   if( nChunk>0 ){
11796     int i = 0;
11797     while( i<nChunk ){
11798       int iVal;
11799       i += fts5GetVarint32(&pChunk[i], iVal);
11800       iVal += pCtx->iRead - 2;
11801       pCtx->iRead = iVal;
11802       if( fts5IndexColsetTest(pCtx->pColset, iVal) ){
11803         fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite);
11804         pCtx->iWrite = iVal;
11805       }
11806     }
11807   }
11808 }
11809 
11810 static void fts5PoslistFilterCallback(
11811   Fts5Index *pUnused,
11812   void *pContext,
11813   const u8 *pChunk, int nChunk
11814 ){
11815   PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext;
11816   UNUSED_PARAM(pUnused);
11817   assert_nc( nChunk>=0 );
11818   if( nChunk>0 ){
11819     /* Search through to find the first varint with value 1. This is the
11820     ** start of the next columns hits. */
11821     int i = 0;
11822     int iStart = 0;
11823 
11824     if( pCtx->eState==2 ){
11825       int iCol;
11826       fts5FastGetVarint32(pChunk, i, iCol);
11827       if( fts5IndexColsetTest(pCtx->pColset, iCol) ){
11828         pCtx->eState = 1;
11829         fts5BufferSafeAppendVarint(pCtx->pBuf, 1);
11830       }else{
11831         pCtx->eState = 0;
11832       }
11833     }
11834 
11835     do {
11836       while( i<nChunk && pChunk[i]!=0x01 ){
11837         while( pChunk[i] & 0x80 ) i++;
11838         i++;
11839       }
11840       if( pCtx->eState ){
11841         fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
11842       }
11843       if( i<nChunk ){
11844         int iCol;
11845         iStart = i;
11846         i++;
11847         if( i>=nChunk ){
11848           pCtx->eState = 2;
11849         }else{
11850           fts5FastGetVarint32(pChunk, i, iCol);
11851           pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol);
11852           if( pCtx->eState ){
11853             fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart);
11854             iStart = i;
11855           }
11856         }
11857       }
11858     }while( i<nChunk );
11859   }
11860 }
11861 
11862 static void fts5ChunkIterate(
11863   Fts5Index *p,                   /* Index object */
11864   Fts5SegIter *pSeg,              /* Poslist of this iterator */
11865   void *pCtx,                     /* Context pointer for xChunk callback */
11866   void (*xChunk)(Fts5Index*, void*, const u8*, int)
11867 ){
11868   int nRem = pSeg->nPos;          /* Number of bytes still to come */
11869   Fts5Data *pData = 0;
11870   u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset];
11871   int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset);
11872   int pgno = pSeg->iLeafPgno;
11873   int pgnoSave = 0;
11874 
11875   /* This function does not work with detail=none databases. */
11876   assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE );
11877 
11878   if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){
11879     pgnoSave = pgno+1;
11880   }
11881 
11882   while( 1 ){
11883     xChunk(p, pCtx, pChunk, nChunk);
11884     nRem -= nChunk;
11885     fts5DataRelease(pData);
11886     if( nRem<=0 ){
11887       break;
11888     }else if( pSeg->pSeg==0 ){
11889       p->rc = FTS5_CORRUPT;
11890       return;
11891     }else{
11892       pgno++;
11893       pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno));
11894       if( pData==0 ) break;
11895       pChunk = &pData->p[4];
11896       nChunk = MIN(nRem, pData->szLeaf - 4);
11897       if( pgno==pgnoSave ){
11898         assert( pSeg->pNextLeaf==0 );
11899         pSeg->pNextLeaf = pData;
11900         pData = 0;
11901       }
11902     }
11903   }
11904 }
11905 
11906 /*
11907 ** Iterator pIter currently points to a valid entry (not EOF). This
11908 ** function appends the position list data for the current entry to
11909 ** buffer pBuf. It does not make a copy of the position-list size
11910 ** field.
11911 */
11912 static void fts5SegiterPoslist(
11913   Fts5Index *p,
11914   Fts5SegIter *pSeg,
11915   Fts5Colset *pColset,
11916   Fts5Buffer *pBuf
11917 ){
11918   assert( pBuf!=0 );
11919   assert( pSeg!=0 );
11920   if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){
11921     assert( pBuf->p!=0 );
11922     assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING );
11923     memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING);
11924     if( pColset==0 ){
11925       fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback);
11926     }else{
11927       if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){
11928         PoslistCallbackCtx sCtx;
11929         sCtx.pBuf = pBuf;
11930         sCtx.pColset = pColset;
11931         sCtx.eState = fts5IndexColsetTest(pColset, 0);
11932         assert( sCtx.eState==0 || sCtx.eState==1 );
11933         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback);
11934       }else{
11935         PoslistOffsetsCtx sCtx;
11936         memset(&sCtx, 0, sizeof(sCtx));
11937         sCtx.pBuf = pBuf;
11938         sCtx.pColset = pColset;
11939         fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback);
11940       }
11941     }
11942   }
11943 }
11944 
11945 /*
11946 ** Parameter pPos points to a buffer containing a position list, size nPos.
11947 ** This function filters it according to pColset (which must be non-NULL)
11948 ** and sets pIter->base.pData/nData to point to the new position list.
11949 ** If memory is required for the new position list, use buffer pIter->poslist.
11950 ** Or, if the new position list is a contiguous subset of the input, set
11951 ** pIter->base.pData/nData to point directly to it.
11952 **
11953 ** This function is a no-op if *pRc is other than SQLITE_OK when it is
11954 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM
11955 ** before returning.
11956 */
11957 static void fts5IndexExtractColset(
11958   int *pRc,
11959   Fts5Colset *pColset,            /* Colset to filter on */
11960   const u8 *pPos, int nPos,       /* Position list */
11961   Fts5Iter *pIter
11962 ){
11963   if( *pRc==SQLITE_OK ){
11964     const u8 *p = pPos;
11965     const u8 *aCopy = p;
11966     const u8 *pEnd = &p[nPos];    /* One byte past end of position list */
11967     int i = 0;
11968     int iCurrent = 0;
11969 
11970     if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){
11971       return;
11972     }
11973 
11974     while( 1 ){
11975       while( pColset->aiCol[i]<iCurrent ){
11976         i++;
11977         if( i==pColset->nCol ){
11978           pIter->base.pData = pIter->poslist.p;
11979           pIter->base.nData = pIter->poslist.n;
11980           return;
11981         }
11982       }
11983 
11984       /* Advance pointer p until it points to pEnd or an 0x01 byte that is
11985       ** not part of a varint */
11986       while( p<pEnd && *p!=0x01 ){
11987         while( *p++ & 0x80 );
11988       }
11989 
11990       if( pColset->aiCol[i]==iCurrent ){
11991         if( pColset->nCol==1 ){
11992           pIter->base.pData = aCopy;
11993           pIter->base.nData = p-aCopy;
11994           return;
11995         }
11996         fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy);
11997       }
11998       if( p>=pEnd ){
11999         pIter->base.pData = pIter->poslist.p;
12000         pIter->base.nData = pIter->poslist.n;
12001         return;
12002       }
12003       aCopy = p++;
12004       iCurrent = *p++;
12005       if( iCurrent & 0x80 ){
12006         p--;
12007         p += fts5GetVarint32(p, iCurrent);
12008       }
12009     }
12010   }
12011 
12012 }
12013 
12014 /*
12015 ** xSetOutputs callback used by detail=none tables.
12016 */
12017 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){
12018   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE );
12019   pIter->base.iRowid = pSeg->iRowid;
12020   pIter->base.nData = pSeg->nPos;
12021 }
12022 
12023 /*
12024 ** xSetOutputs callback used by detail=full and detail=col tables when no
12025 ** column filters are specified.
12026 */
12027 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){
12028   pIter->base.iRowid = pSeg->iRowid;
12029   pIter->base.nData = pSeg->nPos;
12030 
12031   assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE );
12032   assert( pIter->pColset==0 );
12033 
12034   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
12035     /* All data is stored on the current page. Populate the output
12036     ** variables to point into the body of the page object. */
12037     pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset];
12038   }else{
12039     /* The data is distributed over two or more pages. Copy it into the
12040     ** Fts5Iter.poslist buffer and then set the output pointer to point
12041     ** to this buffer.  */
12042     fts5BufferZero(&pIter->poslist);
12043     fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist);
12044     pIter->base.pData = pIter->poslist.p;
12045   }
12046 }
12047 
12048 /*
12049 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match
12050 ** against no columns at all).
12051 */
12052 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){
12053   UNUSED_PARAM(pSeg);
12054   pIter->base.nData = 0;
12055 }
12056 
12057 /*
12058 ** xSetOutputs callback used by detail=col when there is a column filter
12059 ** and there are 100 or more columns. Also called as a fallback from
12060 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page.
12061 */
12062 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){
12063   fts5BufferZero(&pIter->poslist);
12064   fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist);
12065   pIter->base.iRowid = pSeg->iRowid;
12066   pIter->base.pData = pIter->poslist.p;
12067   pIter->base.nData = pIter->poslist.n;
12068 }
12069 
12070 /*
12071 ** xSetOutputs callback used when:
12072 **
12073 **   * detail=col,
12074 **   * there is a column filter, and
12075 **   * the table contains 100 or fewer columns.
12076 **
12077 ** The last point is to ensure all column numbers are stored as
12078 ** single-byte varints.
12079 */
12080 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){
12081 
12082   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS );
12083   assert( pIter->pColset );
12084 
12085   if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){
12086     fts5IterSetOutputs_Col(pIter, pSeg);
12087   }else{
12088     u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset];
12089     u8 *pEnd = (u8*)&a[pSeg->nPos];
12090     int iPrev = 0;
12091     int *aiCol = pIter->pColset->aiCol;
12092     int *aiColEnd = &aiCol[pIter->pColset->nCol];
12093 
12094     u8 *aOut = pIter->poslist.p;
12095     int iPrevOut = 0;
12096 
12097     pIter->base.iRowid = pSeg->iRowid;
12098 
12099     while( a<pEnd ){
12100       iPrev += (int)a++[0] - 2;
12101       while( *aiCol<iPrev ){
12102         aiCol++;
12103         if( aiCol==aiColEnd ) goto setoutputs_col_out;
12104       }
12105       if( *aiCol==iPrev ){
12106         *aOut++ = (u8)((iPrev - iPrevOut) + 2);
12107         iPrevOut = iPrev;
12108       }
12109     }
12110 
12111 setoutputs_col_out:
12112     pIter->base.pData = pIter->poslist.p;
12113     pIter->base.nData = aOut - pIter->poslist.p;
12114   }
12115 }
12116 
12117 /*
12118 ** xSetOutputs callback used by detail=full when there is a column filter.
12119 */
12120 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){
12121   Fts5Colset *pColset = pIter->pColset;
12122   pIter->base.iRowid = pSeg->iRowid;
12123 
12124   assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL );
12125   assert( pColset );
12126 
12127   if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){
12128     /* All data is stored on the current page. Populate the output
12129     ** variables to point into the body of the page object. */
12130     const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset];
12131     int *pRc = &pIter->pIndex->rc;
12132     fts5BufferZero(&pIter->poslist);
12133     fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter);
12134   }else{
12135     /* The data is distributed over two or more pages. Copy it into the
12136     ** Fts5Iter.poslist buffer and then set the output pointer to point
12137     ** to this buffer.  */
12138     fts5BufferZero(&pIter->poslist);
12139     fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist);
12140     pIter->base.pData = pIter->poslist.p;
12141     pIter->base.nData = pIter->poslist.n;
12142   }
12143 }
12144 
12145 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){
12146   assert( pIter!=0 || (*pRc)!=SQLITE_OK );
12147   if( *pRc==SQLITE_OK ){
12148     Fts5Config *pConfig = pIter->pIndex->pConfig;
12149     if( pConfig->eDetail==FTS5_DETAIL_NONE ){
12150       pIter->xSetOutputs = fts5IterSetOutputs_None;
12151     }
12152 
12153     else if( pIter->pColset==0 ){
12154       pIter->xSetOutputs = fts5IterSetOutputs_Nocolset;
12155     }
12156 
12157     else if( pIter->pColset->nCol==0 ){
12158       pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset;
12159     }
12160 
12161     else if( pConfig->eDetail==FTS5_DETAIL_FULL ){
12162       pIter->xSetOutputs = fts5IterSetOutputs_Full;
12163     }
12164 
12165     else{
12166       assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS );
12167       if( pConfig->nCol<=100 ){
12168         pIter->xSetOutputs = fts5IterSetOutputs_Col100;
12169         sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol);
12170       }else{
12171         pIter->xSetOutputs = fts5IterSetOutputs_Col;
12172       }
12173     }
12174   }
12175 }
12176 
12177 
12178 /*
12179 ** Allocate a new Fts5Iter object.
12180 **
12181 ** The new object will be used to iterate through data in structure pStruct.
12182 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel
12183 ** is zero or greater, data from the first nSegment segments on level iLevel
12184 ** is merged.
12185 **
12186 ** The iterator initially points to the first term/rowid entry in the
12187 ** iterated data.
12188 */
12189 static void fts5MultiIterNew(
12190   Fts5Index *p,                   /* FTS5 backend to iterate within */
12191   Fts5Structure *pStruct,         /* Structure of specific index */
12192   int flags,                      /* FTS5INDEX_QUERY_XXX flags */
12193   Fts5Colset *pColset,            /* Colset to filter on (or NULL) */
12194   const u8 *pTerm, int nTerm,     /* Term to seek to (or NULL/0) */
12195   int iLevel,                     /* Level to iterate (-1 for all) */
12196   int nSegment,                   /* Number of segments to merge (iLevel>=0) */
12197   Fts5Iter **ppOut                /* New object */
12198 ){
12199   int nSeg = 0;                   /* Number of segment-iters in use */
12200   int iIter = 0;                  /* */
12201   int iSeg;                       /* Used to iterate through segments */
12202   Fts5StructureLevel *pLvl;
12203   Fts5Iter *pNew;
12204 
12205   assert( (pTerm==0 && nTerm==0) || iLevel<0 );
12206 
12207   /* Allocate space for the new multi-seg-iterator. */
12208   if( p->rc==SQLITE_OK ){
12209     if( iLevel<0 ){
12210       assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) );
12211       nSeg = pStruct->nSegment;
12212       nSeg += (p->pHash ? 1 : 0);
12213     }else{
12214       nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment);
12215     }
12216   }
12217   *ppOut = pNew = fts5MultiIterAlloc(p, nSeg);
12218   if( pNew==0 ){
12219     assert( p->rc!=SQLITE_OK );
12220     goto fts5MultiIterNew_post_check;
12221   }
12222   pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC));
12223   pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY));
12224   pNew->pColset = pColset;
12225   if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){
12226     fts5IterSetOutputCb(&p->rc, pNew);
12227   }
12228 
12229   /* Initialize each of the component segment iterators. */
12230   if( p->rc==SQLITE_OK ){
12231     if( iLevel<0 ){
12232       Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel];
12233       if( p->pHash ){
12234         /* Add a segment iterator for the current contents of the hash table. */
12235         Fts5SegIter *pIter = &pNew->aSeg[iIter++];
12236         fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter);
12237       }
12238       for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){
12239         for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){
12240           Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
12241           Fts5SegIter *pIter = &pNew->aSeg[iIter++];
12242           if( pTerm==0 ){
12243             fts5SegIterInit(p, pSeg, pIter);
12244           }else{
12245             fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter);
12246           }
12247         }
12248       }
12249     }else{
12250       pLvl = &pStruct->aLevel[iLevel];
12251       for(iSeg=nSeg-1; iSeg>=0; iSeg--){
12252         fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]);
12253       }
12254     }
12255     assert( iIter==nSeg );
12256   }
12257 
12258   /* If the above was successful, each component iterators now points
12259   ** to the first entry in its segment. In this case initialize the
12260   ** aFirst[] array. Or, if an error has occurred, free the iterator
12261   ** object and set the output variable to NULL.  */
12262   if( p->rc==SQLITE_OK ){
12263     for(iIter=pNew->nSeg-1; iIter>0; iIter--){
12264       int iEq;
12265       if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){
12266         Fts5SegIter *pSeg = &pNew->aSeg[iEq];
12267         if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0);
12268         fts5MultiIterAdvanced(p, pNew, iEq, iIter);
12269       }
12270     }
12271     fts5MultiIterSetEof(pNew);
12272     fts5AssertMultiIterSetup(p, pNew);
12273 
12274     if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){
12275       fts5MultiIterNext(p, pNew, 0, 0);
12276     }else if( pNew->base.bEof==0 ){
12277       Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst];
12278       pNew->xSetOutputs(pNew, pSeg);
12279     }
12280 
12281   }else{
12282     fts5MultiIterFree(pNew);
12283     *ppOut = 0;
12284   }
12285 
12286 fts5MultiIterNew_post_check:
12287   assert( (*ppOut)!=0 || p->rc!=SQLITE_OK );
12288   return;
12289 }
12290 
12291 /*
12292 ** Create an Fts5Iter that iterates through the doclist provided
12293 ** as the second argument.
12294 */
12295 static void fts5MultiIterNew2(
12296   Fts5Index *p,                   /* FTS5 backend to iterate within */
12297   Fts5Data *pData,                /* Doclist to iterate through */
12298   int bDesc,                      /* True for descending rowid order */
12299   Fts5Iter **ppOut                /* New object */
12300 ){
12301   Fts5Iter *pNew;
12302   pNew = fts5MultiIterAlloc(p, 2);
12303   if( pNew ){
12304     Fts5SegIter *pIter = &pNew->aSeg[1];
12305 
12306     pIter->flags = FTS5_SEGITER_ONETERM;
12307     if( pData->szLeaf>0 ){
12308       pIter->pLeaf = pData;
12309       pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid);
12310       pIter->iEndofDoclist = pData->nn;
12311       pNew->aFirst[1].iFirst = 1;
12312       if( bDesc ){
12313         pNew->bRev = 1;
12314         pIter->flags |= FTS5_SEGITER_REVERSE;
12315         fts5SegIterReverseInitPage(p, pIter);
12316       }else{
12317         fts5SegIterLoadNPos(p, pIter);
12318       }
12319       pData = 0;
12320     }else{
12321       pNew->base.bEof = 1;
12322     }
12323     fts5SegIterSetNext(p, pIter);
12324 
12325     *ppOut = pNew;
12326   }
12327 
12328   fts5DataRelease(pData);
12329 }
12330 
12331 /*
12332 ** Return true if the iterator is at EOF or if an error has occurred.
12333 ** False otherwise.
12334 */
12335 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){
12336   assert( pIter!=0 || p->rc!=SQLITE_OK );
12337   assert( p->rc!=SQLITE_OK
12338       || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof
12339   );
12340   return (p->rc || pIter->base.bEof);
12341 }
12342 
12343 /*
12344 ** Return the rowid of the entry that the iterator currently points
12345 ** to. If the iterator points to EOF when this function is called the
12346 ** results are undefined.
12347 */
12348 static i64 fts5MultiIterRowid(Fts5Iter *pIter){
12349   assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf );
12350   return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid;
12351 }
12352 
12353 /*
12354 ** Move the iterator to the next entry at or following iMatch.
12355 */
12356 static void fts5MultiIterNextFrom(
12357   Fts5Index *p,
12358   Fts5Iter *pIter,
12359   i64 iMatch
12360 ){
12361   while( 1 ){
12362     i64 iRowid;
12363     fts5MultiIterNext(p, pIter, 1, iMatch);
12364     if( fts5MultiIterEof(p, pIter) ) break;
12365     iRowid = fts5MultiIterRowid(pIter);
12366     if( pIter->bRev==0 && iRowid>=iMatch ) break;
12367     if( pIter->bRev!=0 && iRowid<=iMatch ) break;
12368   }
12369 }
12370 
12371 /*
12372 ** Return a pointer to a buffer containing the term associated with the
12373 ** entry that the iterator currently points to.
12374 */
12375 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){
12376   Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
12377   *pn = p->term.n;
12378   return p->term.p;
12379 }
12380 
12381 /*
12382 ** Allocate a new segment-id for the structure pStruct. The new segment
12383 ** id must be between 1 and 65335 inclusive, and must not be used by
12384 ** any currently existing segment. If a free segment id cannot be found,
12385 ** SQLITE_FULL is returned.
12386 **
12387 ** If an error has already occurred, this function is a no-op. 0 is
12388 ** returned in this case.
12389 */
12390 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){
12391   int iSegid = 0;
12392 
12393   if( p->rc==SQLITE_OK ){
12394     if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){
12395       p->rc = SQLITE_FULL;
12396     }else{
12397       /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following
12398       ** array is 63 elements, or 252 bytes, in size.  */
12399       u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32];
12400       int iLvl, iSeg;
12401       int i;
12402       u32 mask;
12403       memset(aUsed, 0, sizeof(aUsed));
12404       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
12405         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
12406           int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid;
12407           if( iId<=FTS5_MAX_SEGMENT && iId>0 ){
12408             aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32);
12409           }
12410         }
12411       }
12412 
12413       for(i=0; aUsed[i]==0xFFFFFFFF; i++);
12414       mask = aUsed[i];
12415       for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++);
12416       iSegid += 1 + i*32;
12417 
12418 #ifdef SQLITE_DEBUG
12419       for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
12420         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
12421           assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid );
12422         }
12423       }
12424       assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT );
12425 
12426       {
12427         sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p);
12428         if( p->rc==SQLITE_OK ){
12429           u8 aBlob[2] = {0xff, 0xff};
12430           sqlite3_bind_int(pIdxSelect, 1, iSegid);
12431           sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC);
12432           assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW );
12433           p->rc = sqlite3_reset(pIdxSelect);
12434           sqlite3_bind_null(pIdxSelect, 2);
12435         }
12436       }
12437 #endif
12438     }
12439   }
12440 
12441   return iSegid;
12442 }
12443 
12444 /*
12445 ** Discard all data currently cached in the hash-tables.
12446 */
12447 static void fts5IndexDiscardData(Fts5Index *p){
12448   assert( p->pHash || p->nPendingData==0 );
12449   if( p->pHash ){
12450     sqlite3Fts5HashClear(p->pHash);
12451     p->nPendingData = 0;
12452   }
12453 }
12454 
12455 /*
12456 ** Return the size of the prefix, in bytes, that buffer
12457 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld).
12458 **
12459 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater
12460 ** than buffer (pOld/nOld).
12461 */
12462 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){
12463   int i;
12464   for(i=0; i<nOld; i++){
12465     if( pOld[i]!=pNew[i] ) break;
12466   }
12467   return i;
12468 }
12469 
12470 static void fts5WriteDlidxClear(
12471   Fts5Index *p,
12472   Fts5SegWriter *pWriter,
12473   int bFlush                      /* If true, write dlidx to disk */
12474 ){
12475   int i;
12476   assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) );
12477   for(i=0; i<pWriter->nDlidx; i++){
12478     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
12479     if( pDlidx->buf.n==0 ) break;
12480     if( bFlush ){
12481       assert( pDlidx->pgno!=0 );
12482       fts5DataWrite(p,
12483           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
12484           pDlidx->buf.p, pDlidx->buf.n
12485       );
12486     }
12487     sqlite3Fts5BufferZero(&pDlidx->buf);
12488     pDlidx->bPrevValid = 0;
12489   }
12490 }
12491 
12492 /*
12493 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size.
12494 ** Any new array elements are zeroed before returning.
12495 */
12496 static int fts5WriteDlidxGrow(
12497   Fts5Index *p,
12498   Fts5SegWriter *pWriter,
12499   int nLvl
12500 ){
12501   if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){
12502     Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64(
12503         pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl
12504     );
12505     if( aDlidx==0 ){
12506       p->rc = SQLITE_NOMEM;
12507     }else{
12508       size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx);
12509       memset(&aDlidx[pWriter->nDlidx], 0, nByte);
12510       pWriter->aDlidx = aDlidx;
12511       pWriter->nDlidx = nLvl;
12512     }
12513   }
12514   return p->rc;
12515 }
12516 
12517 /*
12518 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large
12519 ** enough, flush it to disk and return 1. Otherwise discard it and return
12520 ** zero.
12521 */
12522 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
12523   int bFlag = 0;
12524 
12525   /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
12526   ** to the database, also write the doclist-index to disk.  */
12527   if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
12528     bFlag = 1;
12529   }
12530   fts5WriteDlidxClear(p, pWriter, bFlag);
12531   pWriter->nEmpty = 0;
12532   return bFlag;
12533 }
12534 
12535 /*
12536 ** This function is called whenever processing of the doclist for the
12537 ** last term on leaf page (pWriter->iBtPage) is completed.
12538 **
12539 ** The doclist-index for that term is currently stored in-memory within the
12540 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
12541 ** writes it out to disk. Or, if it is too small to bother with, discards
12542 ** it.
12543 **
12544 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
12545 */
12546 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
12547   int bFlag;
12548 
12549   assert( pWriter->iBtPage || pWriter->nEmpty==0 );
12550   if( pWriter->iBtPage==0 ) return;
12551   bFlag = fts5WriteFlushDlidx(p, pWriter);
12552 
12553   if( p->rc==SQLITE_OK ){
12554     const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
12555     /* The following was already done in fts5WriteInit(): */
12556     /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
12557     sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
12558     sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1));
12559     sqlite3_step(p->pIdxWriter);
12560     p->rc = sqlite3_reset(p->pIdxWriter);
12561     sqlite3_bind_null(p->pIdxWriter, 2);
12562   }
12563   pWriter->iBtPage = 0;
12564 }
12565 
12566 /*
12567 ** This is called once for each leaf page except the first that contains
12568 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that
12569 ** is larger than all terms written to earlier leaves, and equal to or
12570 ** smaller than the first term on the new leaf.
12571 **
12572 ** If an error occurs, an error code is left in Fts5Index.rc. If an error
12573 ** has already occurred when this function is called, it is a no-op.
12574 */
12575 static void fts5WriteBtreeTerm(
12576   Fts5Index *p,                   /* FTS5 backend object */
12577   Fts5SegWriter *pWriter,         /* Writer object */
12578   int nTerm, const u8 *pTerm      /* First term on new page */
12579 ){
12580   fts5WriteFlushBtree(p, pWriter);
12581   if( p->rc==SQLITE_OK ){
12582     fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
12583     pWriter->iBtPage = pWriter->writer.pgno;
12584   }
12585 }
12586 
12587 /*
12588 ** This function is called when flushing a leaf page that contains no
12589 ** terms at all to disk.
12590 */
12591 static void fts5WriteBtreeNoTerm(
12592   Fts5Index *p,                   /* FTS5 backend object */
12593   Fts5SegWriter *pWriter          /* Writer object */
12594 ){
12595   /* If there were no rowids on the leaf page either and the doclist-index
12596   ** has already been started, append an 0x00 byte to it.  */
12597   if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){
12598     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0];
12599     assert( pDlidx->bPrevValid );
12600     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0);
12601   }
12602 
12603   /* Increment the "number of sequential leaves without a term" counter. */
12604   pWriter->nEmpty++;
12605 }
12606 
12607 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){
12608   i64 iRowid;
12609   int iOff;
12610 
12611   iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid);
12612   fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid);
12613   return iRowid;
12614 }
12615 
12616 /*
12617 ** Rowid iRowid has just been appended to the current leaf page. It is the
12618 ** first on the page. This function appends an appropriate entry to the current
12619 ** doclist-index.
12620 */
12621 static void fts5WriteDlidxAppend(
12622   Fts5Index *p,
12623   Fts5SegWriter *pWriter,
12624   i64 iRowid
12625 ){
12626   int i;
12627   int bDone = 0;
12628 
12629   for(i=0; p->rc==SQLITE_OK && bDone==0; i++){
12630     i64 iVal;
12631     Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i];
12632 
12633     if( pDlidx->buf.n>=p->pConfig->pgsz ){
12634       /* The current doclist-index page is full. Write it to disk and push
12635       ** a copy of iRowid (which will become the first rowid on the next
12636       ** doclist-index leaf page) up into the next level of the b-tree
12637       ** hierarchy. If the node being flushed is currently the root node,
12638       ** also push its first rowid upwards. */
12639       pDlidx->buf.p[0] = 0x01;    /* Not the root node */
12640       fts5DataWrite(p,
12641           FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno),
12642           pDlidx->buf.p, pDlidx->buf.n
12643       );
12644       fts5WriteDlidxGrow(p, pWriter, i+2);
12645       pDlidx = &pWriter->aDlidx[i];
12646       if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){
12647         i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf);
12648 
12649         /* This was the root node. Push its first rowid up to the new root. */
12650         pDlidx[1].pgno = pDlidx->pgno;
12651         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0);
12652         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno);
12653         sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst);
12654         pDlidx[1].bPrevValid = 1;
12655         pDlidx[1].iPrev = iFirst;
12656       }
12657 
12658       sqlite3Fts5BufferZero(&pDlidx->buf);
12659       pDlidx->bPrevValid = 0;
12660       pDlidx->pgno++;
12661     }else{
12662       bDone = 1;
12663     }
12664 
12665     if( pDlidx->bPrevValid ){
12666       iVal = iRowid - pDlidx->iPrev;
12667     }else{
12668       i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
12669       assert( pDlidx->buf.n==0 );
12670       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
12671       sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
12672       iVal = iRowid;
12673     }
12674 
12675     sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal);
12676     pDlidx->bPrevValid = 1;
12677     pDlidx->iPrev = iRowid;
12678   }
12679 }
12680 
12681 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
12682   static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
12683   Fts5PageWriter *pPage = &pWriter->writer;
12684   i64 iRowid;
12685 
12686   assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) );
12687 
12688   /* Set the szLeaf header field. */
12689   assert( 0==fts5GetU16(&pPage->buf.p[2]) );
12690   fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n);
12691 
12692   if( pWriter->bFirstTermInPage ){
12693     /* No term was written to this page. */
12694     assert( pPage->pgidx.n==0 );
12695     fts5WriteBtreeNoTerm(p, pWriter);
12696   }else{
12697     /* Append the pgidx to the page buffer. Set the szLeaf header field. */
12698     fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p);
12699   }
12700 
12701   /* Write the page out to disk */
12702   iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno);
12703   fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
12704 
12705   /* Initialize the next page. */
12706   fts5BufferZero(&pPage->buf);
12707   fts5BufferZero(&pPage->pgidx);
12708   fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero);
12709   pPage->iPrevPgidx = 0;
12710   pPage->pgno++;
12711 
12712   /* Increase the leaves written counter */
12713   pWriter->nLeafWritten++;
12714 
12715   /* The new leaf holds no terms or rowids */
12716   pWriter->bFirstTermInPage = 1;
12717   pWriter->bFirstRowidInPage = 1;
12718 }
12719 
12720 /*
12721 ** Append term pTerm/nTerm to the segment being written by the writer passed
12722 ** as the second argument.
12723 **
12724 ** If an error occurs, set the Fts5Index.rc error code. If an error has
12725 ** already occurred, this function is a no-op.
12726 */
12727 static void fts5WriteAppendTerm(
12728   Fts5Index *p,
12729   Fts5SegWriter *pWriter,
12730   int nTerm, const u8 *pTerm
12731 ){
12732   int nPrefix;                    /* Bytes of prefix compression for term */
12733   Fts5PageWriter *pPage = &pWriter->writer;
12734   Fts5Buffer *pPgidx = &pWriter->writer.pgidx;
12735   int nMin = MIN(pPage->term.n, nTerm);
12736 
12737   assert( p->rc==SQLITE_OK );
12738   assert( pPage->buf.n>=4 );
12739   assert( pPage->buf.n>4 || pWriter->bFirstTermInPage );
12740 
12741   /* If the current leaf page is full, flush it to disk. */
12742   if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){
12743     if( pPage->buf.n>4 ){
12744       fts5WriteFlushLeaf(p, pWriter);
12745       if( p->rc!=SQLITE_OK ) return;
12746     }
12747     fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING);
12748   }
12749 
12750   /* TODO1: Updating pgidx here. */
12751   pPgidx->n += sqlite3Fts5PutVarint(
12752       &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx
12753   );
12754   pPage->iPrevPgidx = pPage->buf.n;
12755 #if 0
12756   fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n);
12757   pPgidx->n += 2;
12758 #endif
12759 
12760   if( pWriter->bFirstTermInPage ){
12761     nPrefix = 0;
12762     if( pPage->pgno!=1 ){
12763       /* This is the first term on a leaf that is not the leftmost leaf in
12764       ** the segment b-tree. In this case it is necessary to add a term to
12765       ** the b-tree hierarchy that is (a) larger than the largest term
12766       ** already written to the segment and (b) smaller than or equal to
12767       ** this term. In other words, a prefix of (pTerm/nTerm) that is one
12768       ** byte longer than the longest prefix (pTerm/nTerm) shares with the
12769       ** previous term.
12770       **
12771       ** Usually, the previous term is available in pPage->term. The exception
12772       ** is if this is the first term written in an incremental-merge step.
12773       ** In this case the previous term is not available, so just write a
12774       ** copy of (pTerm/nTerm) into the parent node. This is slightly
12775       ** inefficient, but still correct.  */
12776       int n = nTerm;
12777       if( pPage->term.n ){
12778         n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm);
12779       }
12780       fts5WriteBtreeTerm(p, pWriter, n, pTerm);
12781       if( p->rc!=SQLITE_OK ) return;
12782       pPage = &pWriter->writer;
12783     }
12784   }else{
12785     nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm);
12786     fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix);
12787   }
12788 
12789   /* Append the number of bytes of new data, then the term data itself
12790   ** to the page. */
12791   fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix);
12792   fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]);
12793 
12794   /* Update the Fts5PageWriter.term field. */
12795   fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
12796   pWriter->bFirstTermInPage = 0;
12797 
12798   pWriter->bFirstRowidInPage = 0;
12799   pWriter->bFirstRowidInDoclist = 1;
12800 
12801   assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) );
12802   pWriter->aDlidx[0].pgno = pPage->pgno;
12803 }
12804 
12805 /*
12806 ** Append a rowid and position-list size field to the writers output.
12807 */
12808 static void fts5WriteAppendRowid(
12809   Fts5Index *p,
12810   Fts5SegWriter *pWriter,
12811   i64 iRowid
12812 ){
12813   if( p->rc==SQLITE_OK ){
12814     Fts5PageWriter *pPage = &pWriter->writer;
12815 
12816     if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){
12817       fts5WriteFlushLeaf(p, pWriter);
12818     }
12819 
12820     /* If this is to be the first rowid written to the page, set the
12821     ** rowid-pointer in the page-header. Also append a value to the dlidx
12822     ** buffer, in case a doclist-index is required.  */
12823     if( pWriter->bFirstRowidInPage ){
12824       fts5PutU16(pPage->buf.p, (u16)pPage->buf.n);
12825       fts5WriteDlidxAppend(p, pWriter, iRowid);
12826     }
12827 
12828     /* Write the rowid. */
12829     if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){
12830       fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid);
12831     }else{
12832       assert_nc( p->rc || iRowid>pWriter->iPrevRowid );
12833       fts5BufferAppendVarint(&p->rc, &pPage->buf,
12834           (u64)iRowid - (u64)pWriter->iPrevRowid
12835       );
12836     }
12837     pWriter->iPrevRowid = iRowid;
12838     pWriter->bFirstRowidInDoclist = 0;
12839     pWriter->bFirstRowidInPage = 0;
12840   }
12841 }
12842 
12843 static void fts5WriteAppendPoslistData(
12844   Fts5Index *p,
12845   Fts5SegWriter *pWriter,
12846   const u8 *aData,
12847   int nData
12848 ){
12849   Fts5PageWriter *pPage = &pWriter->writer;
12850   const u8 *a = aData;
12851   int n = nData;
12852 
12853   assert( p->pConfig->pgsz>0 );
12854   while( p->rc==SQLITE_OK
12855      && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz
12856   ){
12857     int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n;
12858     int nCopy = 0;
12859     while( nCopy<nReq ){
12860       i64 dummy;
12861       nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy);
12862     }
12863     fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a);
12864     a += nCopy;
12865     n -= nCopy;
12866     fts5WriteFlushLeaf(p, pWriter);
12867   }
12868   if( n>0 ){
12869     fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a);
12870   }
12871 }
12872 
12873 /*
12874 ** Flush any data cached by the writer object to the database. Free any
12875 ** allocations associated with the writer.
12876 */
12877 static void fts5WriteFinish(
12878   Fts5Index *p,
12879   Fts5SegWriter *pWriter,         /* Writer object */
12880   int *pnLeaf                     /* OUT: Number of leaf pages in b-tree */
12881 ){
12882   int i;
12883   Fts5PageWriter *pLeaf = &pWriter->writer;
12884   if( p->rc==SQLITE_OK ){
12885     assert( pLeaf->pgno>=1 );
12886     if( pLeaf->buf.n>4 ){
12887       fts5WriteFlushLeaf(p, pWriter);
12888     }
12889     *pnLeaf = pLeaf->pgno-1;
12890     if( pLeaf->pgno>1 ){
12891       fts5WriteFlushBtree(p, pWriter);
12892     }
12893   }
12894   fts5BufferFree(&pLeaf->term);
12895   fts5BufferFree(&pLeaf->buf);
12896   fts5BufferFree(&pLeaf->pgidx);
12897   fts5BufferFree(&pWriter->btterm);
12898 
12899   for(i=0; i<pWriter->nDlidx; i++){
12900     sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
12901   }
12902   sqlite3_free(pWriter->aDlidx);
12903 }
12904 
12905 static void fts5WriteInit(
12906   Fts5Index *p,
12907   Fts5SegWriter *pWriter,
12908   int iSegid
12909 ){
12910   const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING;
12911 
12912   memset(pWriter, 0, sizeof(Fts5SegWriter));
12913   pWriter->iSegid = iSegid;
12914 
12915   fts5WriteDlidxGrow(p, pWriter, 1);
12916   pWriter->writer.pgno = 1;
12917   pWriter->bFirstTermInPage = 1;
12918   pWriter->iBtPage = 1;
12919 
12920   assert( pWriter->writer.buf.n==0 );
12921   assert( pWriter->writer.pgidx.n==0 );
12922 
12923   /* Grow the two buffers to pgsz + padding bytes in size. */
12924   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer);
12925   sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer);
12926 
12927   if( p->pIdxWriter==0 ){
12928     Fts5Config *pConfig = p->pConfig;
12929     fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
12930           "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)",
12931           pConfig->zDb, pConfig->zName
12932     ));
12933   }
12934 
12935   if( p->rc==SQLITE_OK ){
12936     /* Initialize the 4-byte leaf-page header to 0x00. */
12937     memset(pWriter->writer.buf.p, 0, 4);
12938     pWriter->writer.buf.n = 4;
12939 
12940     /* Bind the current output segment id to the index-writer. This is an
12941     ** optimization over binding the same value over and over as rows are
12942     ** inserted into %_idx by the current writer.  */
12943     sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
12944   }
12945 }
12946 
12947 /*
12948 ** Iterator pIter was used to iterate through the input segments of on an
12949 ** incremental merge operation. This function is called if the incremental
12950 ** merge step has finished but the input has not been completely exhausted.
12951 */
12952 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){
12953   int i;
12954   Fts5Buffer buf;
12955   memset(&buf, 0, sizeof(Fts5Buffer));
12956   for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){
12957     Fts5SegIter *pSeg = &pIter->aSeg[i];
12958     if( pSeg->pSeg==0 ){
12959       /* no-op */
12960     }else if( pSeg->pLeaf==0 ){
12961       /* All keys from this input segment have been transfered to the output.
12962       ** Set both the first and last page-numbers to 0 to indicate that the
12963       ** segment is now empty. */
12964       pSeg->pSeg->pgnoLast = 0;
12965       pSeg->pSeg->pgnoFirst = 0;
12966     }else{
12967       int iOff = pSeg->iTermLeafOffset;     /* Offset on new first leaf page */
12968       i64 iLeafRowid;
12969       Fts5Data *pData;
12970       int iId = pSeg->pSeg->iSegid;
12971       u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00};
12972 
12973       iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno);
12974       pData = fts5LeafRead(p, iLeafRowid);
12975       if( pData ){
12976         if( iOff>pData->szLeaf ){
12977           /* This can occur if the pages that the segments occupy overlap - if
12978           ** a single page has been assigned to more than one segment. In
12979           ** this case a prior iteration of this loop may have corrupted the
12980           ** segment currently being trimmed.  */
12981           p->rc = FTS5_CORRUPT;
12982         }else{
12983           fts5BufferZero(&buf);
12984           fts5BufferGrow(&p->rc, &buf, pData->nn);
12985           fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr);
12986           fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n);
12987           fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p);
12988           fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]);
12989           if( p->rc==SQLITE_OK ){
12990             /* Set the szLeaf field */
12991             fts5PutU16(&buf.p[2], (u16)buf.n);
12992           }
12993 
12994           /* Set up the new page-index array */
12995           fts5BufferAppendVarint(&p->rc, &buf, 4);
12996           if( pSeg->iLeafPgno==pSeg->iTermLeafPgno
12997            && pSeg->iEndofDoclist<pData->szLeaf
12998            && pSeg->iPgidxOff<=pData->nn
12999           ){
13000             int nDiff = pData->szLeaf - pSeg->iEndofDoclist;
13001             fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4);
13002             fts5BufferAppendBlob(&p->rc, &buf,
13003                 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff]
13004             );
13005           }
13006 
13007           pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
13008           fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid);
13009           fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
13010         }
13011         fts5DataRelease(pData);
13012       }
13013     }
13014   }
13015   fts5BufferFree(&buf);
13016 }
13017 
13018 static void fts5MergeChunkCallback(
13019   Fts5Index *p,
13020   void *pCtx,
13021   const u8 *pChunk, int nChunk
13022 ){
13023   Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx;
13024   fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk);
13025 }
13026 
13027 /*
13028 **
13029 */
13030 static void fts5IndexMergeLevel(
13031   Fts5Index *p,                   /* FTS5 backend object */
13032   Fts5Structure **ppStruct,       /* IN/OUT: Stucture of index */
13033   int iLvl,                       /* Level to read input from */
13034   int *pnRem                      /* Write up to this many output leaves */
13035 ){
13036   Fts5Structure *pStruct = *ppStruct;
13037   Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
13038   Fts5StructureLevel *pLvlOut;
13039   Fts5Iter *pIter = 0;       /* Iterator to read input data */
13040   int nRem = pnRem ? *pnRem : 0;  /* Output leaf pages left to write */
13041   int nInput;                     /* Number of input segments */
13042   Fts5SegWriter writer;           /* Writer object */
13043   Fts5StructureSegment *pSeg;     /* Output segment */
13044   Fts5Buffer term;
13045   int bOldest;                    /* True if the output segment is the oldest */
13046   int eDetail = p->pConfig->eDetail;
13047   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
13048   int bTermWritten = 0;           /* True if current term already output */
13049 
13050   assert( iLvl<pStruct->nLevel );
13051   assert( pLvl->nMerge<=pLvl->nSeg );
13052 
13053   memset(&writer, 0, sizeof(Fts5SegWriter));
13054   memset(&term, 0, sizeof(Fts5Buffer));
13055   if( pLvl->nMerge ){
13056     pLvlOut = &pStruct->aLevel[iLvl+1];
13057     assert( pLvlOut->nSeg>0 );
13058     nInput = pLvl->nMerge;
13059     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
13060 
13061     fts5WriteInit(p, &writer, pSeg->iSegid);
13062     writer.writer.pgno = pSeg->pgnoLast+1;
13063     writer.iBtPage = 0;
13064   }else{
13065     int iSegid = fts5AllocateSegid(p, pStruct);
13066 
13067     /* Extend the Fts5Structure object as required to ensure the output
13068     ** segment exists. */
13069     if( iLvl==pStruct->nLevel-1 ){
13070       fts5StructureAddLevel(&p->rc, ppStruct);
13071       pStruct = *ppStruct;
13072     }
13073     fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0);
13074     if( p->rc ) return;
13075     pLvl = &pStruct->aLevel[iLvl];
13076     pLvlOut = &pStruct->aLevel[iLvl+1];
13077 
13078     fts5WriteInit(p, &writer, iSegid);
13079 
13080     /* Add the new segment to the output level */
13081     pSeg = &pLvlOut->aSeg[pLvlOut->nSeg];
13082     pLvlOut->nSeg++;
13083     pSeg->pgnoFirst = 1;
13084     pSeg->iSegid = iSegid;
13085     pStruct->nSegment++;
13086 
13087     /* Read input from all segments in the input level */
13088     nInput = pLvl->nSeg;
13089   }
13090   bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2);
13091 
13092   assert( iLvl>=0 );
13093   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter);
13094       fts5MultiIterEof(p, pIter)==0;
13095       fts5MultiIterNext(p, pIter, 0, 0)
13096   ){
13097     Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
13098     int nPos;                     /* position-list size field value */
13099     int nTerm;
13100     const u8 *pTerm;
13101 
13102     pTerm = fts5MultiIterTerm(pIter, &nTerm);
13103     if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){
13104       if( pnRem && writer.nLeafWritten>nRem ){
13105         break;
13106       }
13107       fts5BufferSet(&p->rc, &term, nTerm, pTerm);
13108       bTermWritten =0;
13109     }
13110 
13111     /* Check for key annihilation. */
13112     if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue;
13113 
13114     if( p->rc==SQLITE_OK && bTermWritten==0 ){
13115       /* This is a new term. Append a term to the output segment. */
13116       fts5WriteAppendTerm(p, &writer, nTerm, pTerm);
13117       bTermWritten = 1;
13118     }
13119 
13120     /* Append the rowid to the output */
13121     /* WRITEPOSLISTSIZE */
13122     fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter));
13123 
13124     if( eDetail==FTS5_DETAIL_NONE ){
13125       if( pSegIter->bDel ){
13126         fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
13127         if( pSegIter->nPos>0 ){
13128           fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0);
13129         }
13130       }
13131     }else{
13132       /* Append the position-list data to the output */
13133       nPos = pSegIter->nPos*2 + pSegIter->bDel;
13134       fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos);
13135       fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback);
13136     }
13137   }
13138 
13139   /* Flush the last leaf page to disk. Set the output segment b-tree height
13140   ** and last leaf page number at the same time.  */
13141   fts5WriteFinish(p, &writer, &pSeg->pgnoLast);
13142 
13143   assert( pIter!=0 || p->rc!=SQLITE_OK );
13144   if( fts5MultiIterEof(p, pIter) ){
13145     int i;
13146 
13147     /* Remove the redundant segments from the %_data table */
13148     for(i=0; i<nInput; i++){
13149       fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid);
13150     }
13151 
13152     /* Remove the redundant segments from the input level */
13153     if( pLvl->nSeg!=nInput ){
13154       int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment);
13155       memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove);
13156     }
13157     pStruct->nSegment -= nInput;
13158     pLvl->nSeg -= nInput;
13159     pLvl->nMerge = 0;
13160     if( pSeg->pgnoLast==0 ){
13161       pLvlOut->nSeg--;
13162       pStruct->nSegment--;
13163     }
13164   }else{
13165     assert( pSeg->pgnoLast>0 );
13166     fts5TrimSegments(p, pIter);
13167     pLvl->nMerge = nInput;
13168   }
13169 
13170   fts5MultiIterFree(pIter);
13171   fts5BufferFree(&term);
13172   if( pnRem ) *pnRem -= writer.nLeafWritten;
13173 }
13174 
13175 /*
13176 ** Do up to nPg pages of automerge work on the index.
13177 **
13178 ** Return true if any changes were actually made, or false otherwise.
13179 */
13180 static int fts5IndexMerge(
13181   Fts5Index *p,                   /* FTS5 backend object */
13182   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
13183   int nPg,                        /* Pages of work to do */
13184   int nMin                        /* Minimum number of segments to merge */
13185 ){
13186   int nRem = nPg;
13187   int bRet = 0;
13188   Fts5Structure *pStruct = *ppStruct;
13189   while( nRem>0 && p->rc==SQLITE_OK ){
13190     int iLvl;                   /* To iterate through levels */
13191     int iBestLvl = 0;           /* Level offering the most input segments */
13192     int nBest = 0;              /* Number of input segments on best level */
13193 
13194     /* Set iBestLvl to the level to read input segments from. */
13195     assert( pStruct->nLevel>0 );
13196     for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
13197       Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl];
13198       if( pLvl->nMerge ){
13199         if( pLvl->nMerge>nBest ){
13200           iBestLvl = iLvl;
13201           nBest = pLvl->nMerge;
13202         }
13203         break;
13204       }
13205       if( pLvl->nSeg>nBest ){
13206         nBest = pLvl->nSeg;
13207         iBestLvl = iLvl;
13208       }
13209     }
13210 
13211     /* If nBest is still 0, then the index must be empty. */
13212 #ifdef SQLITE_DEBUG
13213     for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){
13214       assert( pStruct->aLevel[iLvl].nSeg==0 );
13215     }
13216 #endif
13217 
13218     if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){
13219       break;
13220     }
13221     bRet = 1;
13222     fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem);
13223     if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){
13224       fts5StructurePromote(p, iBestLvl+1, pStruct);
13225     }
13226   }
13227   *ppStruct = pStruct;
13228   return bRet;
13229 }
13230 
13231 /*
13232 ** A total of nLeaf leaf pages of data has just been flushed to a level-0
13233 ** segment. This function updates the write-counter accordingly and, if
13234 ** necessary, performs incremental merge work.
13235 **
13236 ** If an error occurs, set the Fts5Index.rc error code. If an error has
13237 ** already occurred, this function is a no-op.
13238 */
13239 static void fts5IndexAutomerge(
13240   Fts5Index *p,                   /* FTS5 backend object */
13241   Fts5Structure **ppStruct,       /* IN/OUT: Current structure of index */
13242   int nLeaf                       /* Number of output leaves just written */
13243 ){
13244   if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){
13245     Fts5Structure *pStruct = *ppStruct;
13246     u64 nWrite;                   /* Initial value of write-counter */
13247     int nWork;                    /* Number of work-quanta to perform */
13248     int nRem;                     /* Number of leaf pages left to write */
13249 
13250     /* Update the write-counter. While doing so, set nWork. */
13251     nWrite = pStruct->nWriteCounter;
13252     nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit));
13253     pStruct->nWriteCounter += nLeaf;
13254     nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel);
13255 
13256     fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge);
13257   }
13258 }
13259 
13260 static void fts5IndexCrisismerge(
13261   Fts5Index *p,                   /* FTS5 backend object */
13262   Fts5Structure **ppStruct        /* IN/OUT: Current structure of index */
13263 ){
13264   const int nCrisis = p->pConfig->nCrisisMerge;
13265   Fts5Structure *pStruct = *ppStruct;
13266   int iLvl = 0;
13267 
13268   assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 );
13269   while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){
13270     fts5IndexMergeLevel(p, &pStruct, iLvl, 0);
13271     assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) );
13272     fts5StructurePromote(p, iLvl+1, pStruct);
13273     iLvl++;
13274   }
13275   *ppStruct = pStruct;
13276 }
13277 
13278 static int fts5IndexReturn(Fts5Index *p){
13279   int rc = p->rc;
13280   p->rc = SQLITE_OK;
13281   return rc;
13282 }
13283 
13284 typedef struct Fts5FlushCtx Fts5FlushCtx;
13285 struct Fts5FlushCtx {
13286   Fts5Index *pIdx;
13287   Fts5SegWriter writer;
13288 };
13289 
13290 /*
13291 ** Buffer aBuf[] contains a list of varints, all small enough to fit
13292 ** in a 32-bit integer. Return the size of the largest prefix of this
13293 ** list nMax bytes or less in size.
13294 */
13295 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){
13296   int ret;
13297   u32 dummy;
13298   ret = fts5GetVarint32(aBuf, dummy);
13299   if( ret<nMax ){
13300     while( 1 ){
13301       int i = fts5GetVarint32(&aBuf[ret], dummy);
13302       if( (ret + i) > nMax ) break;
13303       ret += i;
13304     }
13305   }
13306   return ret;
13307 }
13308 
13309 /*
13310 ** Flush the contents of in-memory hash table iHash to a new level-0
13311 ** segment on disk. Also update the corresponding structure record.
13312 **
13313 ** If an error occurs, set the Fts5Index.rc error code. If an error has
13314 ** already occurred, this function is a no-op.
13315 */
13316 static void fts5FlushOneHash(Fts5Index *p){
13317   Fts5Hash *pHash = p->pHash;
13318   Fts5Structure *pStruct;
13319   int iSegid;
13320   int pgnoLast = 0;                 /* Last leaf page number in segment */
13321 
13322   /* Obtain a reference to the index structure and allocate a new segment-id
13323   ** for the new level-0 segment.  */
13324   pStruct = fts5StructureRead(p);
13325   iSegid = fts5AllocateSegid(p, pStruct);
13326   fts5StructureInvalidate(p);
13327 
13328   if( iSegid ){
13329     const int pgsz = p->pConfig->pgsz;
13330     int eDetail = p->pConfig->eDetail;
13331     Fts5StructureSegment *pSeg;   /* New segment within pStruct */
13332     Fts5Buffer *pBuf;             /* Buffer in which to assemble leaf page */
13333     Fts5Buffer *pPgidx;           /* Buffer in which to assemble pgidx */
13334 
13335     Fts5SegWriter writer;
13336     fts5WriteInit(p, &writer, iSegid);
13337 
13338     pBuf = &writer.writer.buf;
13339     pPgidx = &writer.writer.pgidx;
13340 
13341     /* fts5WriteInit() should have initialized the buffers to (most likely)
13342     ** the maximum space required. */
13343     assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) );
13344     assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) );
13345 
13346     /* Begin scanning through hash table entries. This loop runs once for each
13347     ** term/doclist currently stored within the hash table. */
13348     if( p->rc==SQLITE_OK ){
13349       p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0);
13350     }
13351     while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){
13352       const char *zTerm;          /* Buffer containing term */
13353       const u8 *pDoclist;         /* Pointer to doclist for this term */
13354       int nDoclist;               /* Size of doclist in bytes */
13355 
13356       /* Write the term for this entry to disk. */
13357       sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist);
13358       fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm);
13359       if( p->rc!=SQLITE_OK ) break;
13360 
13361       assert( writer.bFirstRowidInPage==0 );
13362       if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){
13363         /* The entire doclist will fit on the current leaf. */
13364         fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist);
13365       }else{
13366         i64 iRowid = 0;
13367         u64 iDelta = 0;
13368         int iOff = 0;
13369 
13370         /* The entire doclist will not fit on this leaf. The following
13371         ** loop iterates through the poslists that make up the current
13372         ** doclist.  */
13373         while( p->rc==SQLITE_OK && iOff<nDoclist ){
13374           iOff += fts5GetVarint(&pDoclist[iOff], &iDelta);
13375           iRowid += iDelta;
13376 
13377           if( writer.bFirstRowidInPage ){
13378             fts5PutU16(&pBuf->p[0], (u16)pBuf->n);   /* first rowid on page */
13379             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid);
13380             writer.bFirstRowidInPage = 0;
13381             fts5WriteDlidxAppend(p, &writer, iRowid);
13382             if( p->rc!=SQLITE_OK ) break;
13383           }else{
13384             pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta);
13385           }
13386           assert( pBuf->n<=pBuf->nSpace );
13387 
13388           if( eDetail==FTS5_DETAIL_NONE ){
13389             if( iOff<nDoclist && pDoclist[iOff]==0 ){
13390               pBuf->p[pBuf->n++] = 0;
13391               iOff++;
13392               if( iOff<nDoclist && pDoclist[iOff]==0 ){
13393                 pBuf->p[pBuf->n++] = 0;
13394                 iOff++;
13395               }
13396             }
13397             if( (pBuf->n + pPgidx->n)>=pgsz ){
13398               fts5WriteFlushLeaf(p, &writer);
13399             }
13400           }else{
13401             int bDummy;
13402             int nPos;
13403             int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy);
13404             nCopy += nPos;
13405             if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){
13406               /* The entire poslist will fit on the current leaf. So copy
13407               ** it in one go. */
13408               fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy);
13409             }else{
13410               /* The entire poslist will not fit on this leaf. So it needs
13411               ** to be broken into sections. The only qualification being
13412               ** that each varint must be stored contiguously.  */
13413               const u8 *pPoslist = &pDoclist[iOff];
13414               int iPos = 0;
13415               while( p->rc==SQLITE_OK ){
13416                 int nSpace = pgsz - pBuf->n - pPgidx->n;
13417                 int n = 0;
13418                 if( (nCopy - iPos)<=nSpace ){
13419                   n = nCopy - iPos;
13420                 }else{
13421                   n = fts5PoslistPrefix(&pPoslist[iPos], nSpace);
13422                 }
13423                 assert( n>0 );
13424                 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n);
13425                 iPos += n;
13426                 if( (pBuf->n + pPgidx->n)>=pgsz ){
13427                   fts5WriteFlushLeaf(p, &writer);
13428                 }
13429                 if( iPos>=nCopy ) break;
13430               }
13431             }
13432             iOff += nCopy;
13433           }
13434         }
13435       }
13436 
13437       /* TODO2: Doclist terminator written here. */
13438       /* pBuf->p[pBuf->n++] = '\0'; */
13439       assert( pBuf->n<=pBuf->nSpace );
13440       if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash);
13441     }
13442     sqlite3Fts5HashClear(pHash);
13443     fts5WriteFinish(p, &writer, &pgnoLast);
13444 
13445     /* Update the Fts5Structure. It is written back to the database by the
13446     ** fts5StructureRelease() call below.  */
13447     if( pStruct->nLevel==0 ){
13448       fts5StructureAddLevel(&p->rc, &pStruct);
13449     }
13450     fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0);
13451     if( p->rc==SQLITE_OK ){
13452       pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ];
13453       pSeg->iSegid = iSegid;
13454       pSeg->pgnoFirst = 1;
13455       pSeg->pgnoLast = pgnoLast;
13456       pStruct->nSegment++;
13457     }
13458     fts5StructurePromote(p, 0, pStruct);
13459   }
13460 
13461   fts5IndexAutomerge(p, &pStruct, pgnoLast);
13462   fts5IndexCrisismerge(p, &pStruct);
13463   fts5StructureWrite(p, pStruct);
13464   fts5StructureRelease(pStruct);
13465 }
13466 
13467 /*
13468 ** Flush any data stored in the in-memory hash tables to the database.
13469 */
13470 static void fts5IndexFlush(Fts5Index *p){
13471   /* Unless it is empty, flush the hash table to disk */
13472   if( p->nPendingData ){
13473     assert( p->pHash );
13474     p->nPendingData = 0;
13475     fts5FlushOneHash(p);
13476   }
13477 }
13478 
13479 static Fts5Structure *fts5IndexOptimizeStruct(
13480   Fts5Index *p,
13481   Fts5Structure *pStruct
13482 ){
13483   Fts5Structure *pNew = 0;
13484   sqlite3_int64 nByte = sizeof(Fts5Structure);
13485   int nSeg = pStruct->nSegment;
13486   int i;
13487 
13488   /* Figure out if this structure requires optimization. A structure does
13489   ** not require optimization if either:
13490   **
13491   **  + it consists of fewer than two segments, or
13492   **  + all segments are on the same level, or
13493   **  + all segments except one are currently inputs to a merge operation.
13494   **
13495   ** In the first case, return NULL. In the second, increment the ref-count
13496   ** on *pStruct and return a copy of the pointer to it.
13497   */
13498   if( nSeg<2 ) return 0;
13499   for(i=0; i<pStruct->nLevel; i++){
13500     int nThis = pStruct->aLevel[i].nSeg;
13501     if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){
13502       fts5StructureRef(pStruct);
13503       return pStruct;
13504     }
13505     assert( pStruct->aLevel[i].nMerge<=nThis );
13506   }
13507 
13508   nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel);
13509   pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte);
13510 
13511   if( pNew ){
13512     Fts5StructureLevel *pLvl;
13513     nByte = nSeg * sizeof(Fts5StructureSegment);
13514     pNew->nLevel = pStruct->nLevel+1;
13515     pNew->nRef = 1;
13516     pNew->nWriteCounter = pStruct->nWriteCounter;
13517     pLvl = &pNew->aLevel[pStruct->nLevel];
13518     pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte);
13519     if( pLvl->aSeg ){
13520       int iLvl, iSeg;
13521       int iSegOut = 0;
13522       /* Iterate through all segments, from oldest to newest. Add them to
13523       ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest
13524       ** segment in the data structure.  */
13525       for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){
13526         for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
13527           pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg];
13528           iSegOut++;
13529         }
13530       }
13531       pNew->nSegment = pLvl->nSeg = nSeg;
13532     }else{
13533       sqlite3_free(pNew);
13534       pNew = 0;
13535     }
13536   }
13537 
13538   return pNew;
13539 }
13540 
13541 static int sqlite3Fts5IndexOptimize(Fts5Index *p){
13542   Fts5Structure *pStruct;
13543   Fts5Structure *pNew = 0;
13544 
13545   assert( p->rc==SQLITE_OK );
13546   fts5IndexFlush(p);
13547   pStruct = fts5StructureRead(p);
13548   fts5StructureInvalidate(p);
13549 
13550   if( pStruct ){
13551     pNew = fts5IndexOptimizeStruct(p, pStruct);
13552   }
13553   fts5StructureRelease(pStruct);
13554 
13555   assert( pNew==0 || pNew->nSegment>0 );
13556   if( pNew ){
13557     int iLvl;
13558     for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){}
13559     while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){
13560       int nRem = FTS5_OPT_WORK_UNIT;
13561       fts5IndexMergeLevel(p, &pNew, iLvl, &nRem);
13562     }
13563 
13564     fts5StructureWrite(p, pNew);
13565     fts5StructureRelease(pNew);
13566   }
13567 
13568   return fts5IndexReturn(p);
13569 }
13570 
13571 /*
13572 ** This is called to implement the special "VALUES('merge', $nMerge)"
13573 ** INSERT command.
13574 */
13575 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){
13576   Fts5Structure *pStruct = fts5StructureRead(p);
13577   if( pStruct ){
13578     int nMin = p->pConfig->nUsermerge;
13579     fts5StructureInvalidate(p);
13580     if( nMerge<0 ){
13581       Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct);
13582       fts5StructureRelease(pStruct);
13583       pStruct = pNew;
13584       nMin = 2;
13585       nMerge = nMerge*-1;
13586     }
13587     if( pStruct && pStruct->nLevel ){
13588       if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){
13589         fts5StructureWrite(p, pStruct);
13590       }
13591     }
13592     fts5StructureRelease(pStruct);
13593   }
13594   return fts5IndexReturn(p);
13595 }
13596 
13597 static void fts5AppendRowid(
13598   Fts5Index *p,
13599   u64 iDelta,
13600   Fts5Iter *pUnused,
13601   Fts5Buffer *pBuf
13602 ){
13603   UNUSED_PARAM(pUnused);
13604   fts5BufferAppendVarint(&p->rc, pBuf, iDelta);
13605 }
13606 
13607 static void fts5AppendPoslist(
13608   Fts5Index *p,
13609   u64 iDelta,
13610   Fts5Iter *pMulti,
13611   Fts5Buffer *pBuf
13612 ){
13613   int nData = pMulti->base.nData;
13614   int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING;
13615   assert( nData>0 );
13616   if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){
13617     fts5BufferSafeAppendVarint(pBuf, iDelta);
13618     fts5BufferSafeAppendVarint(pBuf, nData*2);
13619     fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData);
13620     memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING);
13621   }
13622 }
13623 
13624 
13625 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){
13626   u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist;
13627 
13628   assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) );
13629   if( p>=pIter->aEof ){
13630     pIter->aPoslist = 0;
13631   }else{
13632     i64 iDelta;
13633 
13634     p += fts5GetVarint(p, (u64*)&iDelta);
13635     pIter->iRowid += iDelta;
13636 
13637     /* Read position list size */
13638     if( p[0] & 0x80 ){
13639       int nPos;
13640       pIter->nSize = fts5GetVarint32(p, nPos);
13641       pIter->nPoslist = (nPos>>1);
13642     }else{
13643       pIter->nPoslist = ((int)(p[0])) >> 1;
13644       pIter->nSize = 1;
13645     }
13646 
13647     pIter->aPoslist = p;
13648     if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){
13649       pIter->aPoslist = 0;
13650     }
13651   }
13652 }
13653 
13654 static void fts5DoclistIterInit(
13655   Fts5Buffer *pBuf,
13656   Fts5DoclistIter *pIter
13657 ){
13658   memset(pIter, 0, sizeof(*pIter));
13659   if( pBuf->n>0 ){
13660     pIter->aPoslist = pBuf->p;
13661     pIter->aEof = &pBuf->p[pBuf->n];
13662     fts5DoclistIterNext(pIter);
13663   }
13664 }
13665 
13666 #if 0
13667 /*
13668 ** Append a doclist to buffer pBuf.
13669 **
13670 ** This function assumes that space within the buffer has already been
13671 ** allocated.
13672 */
13673 static void fts5MergeAppendDocid(
13674   Fts5Buffer *pBuf,               /* Buffer to write to */
13675   i64 *piLastRowid,               /* IN/OUT: Previous rowid written (if any) */
13676   i64 iRowid                      /* Rowid to append */
13677 ){
13678   assert( pBuf->n!=0 || (*piLastRowid)==0 );
13679   fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid);
13680   *piLastRowid = iRowid;
13681 }
13682 #endif
13683 
13684 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) {                 \
13685   assert( (pBuf)->n!=0 || (iLastRowid)==0 );                             \
13686   fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \
13687   (iLastRowid) = (iRowid);                                               \
13688 }
13689 
13690 /*
13691 ** Swap the contents of buffer *p1 with that of *p2.
13692 */
13693 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){
13694   Fts5Buffer tmp = *p1;
13695   *p1 = *p2;
13696   *p2 = tmp;
13697 }
13698 
13699 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){
13700   int i = *piOff;
13701   if( i>=pBuf->n ){
13702     *piOff = -1;
13703   }else{
13704     u64 iVal;
13705     *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal);
13706     *piRowid += iVal;
13707   }
13708 }
13709 
13710 /*
13711 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode.
13712 ** In this case the buffers consist of a delta-encoded list of rowids only.
13713 */
13714 static void fts5MergeRowidLists(
13715   Fts5Index *p,                   /* FTS5 backend object */
13716   Fts5Buffer *p1,                 /* First list to merge */
13717   int nBuf,                       /* Number of entries in apBuf[] */
13718   Fts5Buffer *aBuf                /* Array of other lists to merge into p1 */
13719 ){
13720   int i1 = 0;
13721   int i2 = 0;
13722   i64 iRowid1 = 0;
13723   i64 iRowid2 = 0;
13724   i64 iOut = 0;
13725   Fts5Buffer *p2 = &aBuf[0];
13726   Fts5Buffer out;
13727 
13728   (void)nBuf;
13729   memset(&out, 0, sizeof(out));
13730   assert( nBuf==1 );
13731   sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n);
13732   if( p->rc ) return;
13733 
13734   fts5NextRowid(p1, &i1, &iRowid1);
13735   fts5NextRowid(p2, &i2, &iRowid2);
13736   while( i1>=0 || i2>=0 ){
13737     if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){
13738       assert( iOut==0 || iRowid1>iOut );
13739       fts5BufferSafeAppendVarint(&out, iRowid1 - iOut);
13740       iOut = iRowid1;
13741       fts5NextRowid(p1, &i1, &iRowid1);
13742     }else{
13743       assert( iOut==0 || iRowid2>iOut );
13744       fts5BufferSafeAppendVarint(&out, iRowid2 - iOut);
13745       iOut = iRowid2;
13746       if( i1>=0 && iRowid1==iRowid2 ){
13747         fts5NextRowid(p1, &i1, &iRowid1);
13748       }
13749       fts5NextRowid(p2, &i2, &iRowid2);
13750     }
13751   }
13752 
13753   fts5BufferSwap(&out, p1);
13754   fts5BufferFree(&out);
13755 }
13756 
13757 typedef struct PrefixMerger PrefixMerger;
13758 struct PrefixMerger {
13759   Fts5DoclistIter iter;           /* Doclist iterator */
13760   i64 iPos;                       /* For iterating through a position list */
13761   int iOff;
13762   u8 *aPos;
13763   PrefixMerger *pNext;            /* Next in docid/poslist order */
13764 };
13765 
13766 static void fts5PrefixMergerInsertByRowid(
13767   PrefixMerger **ppHead,
13768   PrefixMerger *p
13769 ){
13770   if( p->iter.aPoslist ){
13771     PrefixMerger **pp = ppHead;
13772     while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){
13773       pp = &(*pp)->pNext;
13774     }
13775     p->pNext = *pp;
13776     *pp = p;
13777   }
13778 }
13779 
13780 static void fts5PrefixMergerInsertByPosition(
13781   PrefixMerger **ppHead,
13782   PrefixMerger *p
13783 ){
13784   if( p->iPos>=0 ){
13785     PrefixMerger **pp = ppHead;
13786     while( *pp && p->iPos>(*pp)->iPos ){
13787       pp = &(*pp)->pNext;
13788     }
13789     p->pNext = *pp;
13790     *pp = p;
13791   }
13792 }
13793 
13794 
13795 /*
13796 ** Array aBuf[] contains nBuf doclists. These are all merged in with the
13797 ** doclist in buffer p1.
13798 */
13799 static void fts5MergePrefixLists(
13800   Fts5Index *p,                   /* FTS5 backend object */
13801   Fts5Buffer *p1,                 /* First list to merge */
13802   int nBuf,                       /* Number of buffers in array aBuf[] */
13803   Fts5Buffer *aBuf                /* Other lists to merge in */
13804 ){
13805 #define fts5PrefixMergerNextPosition(p) \
13806   sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos)
13807 #define FTS5_MERGE_NLIST 16
13808   PrefixMerger aMerger[FTS5_MERGE_NLIST];
13809   PrefixMerger *pHead = 0;
13810   int i;
13811   int nOut = 0;
13812   Fts5Buffer out = {0, 0, 0};
13813   Fts5Buffer tmp = {0, 0, 0};
13814   i64 iLastRowid = 0;
13815 
13816   /* Initialize a doclist-iterator for each input buffer. Arrange them in
13817   ** a linked-list starting at pHead in ascending order of rowid. Avoid
13818   ** linking any iterators already at EOF into the linked list at all. */
13819   assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) );
13820   memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1));
13821   pHead = &aMerger[nBuf];
13822   fts5DoclistIterInit(p1, &pHead->iter);
13823   for(i=0; i<nBuf; i++){
13824     fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter);
13825     fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]);
13826     nOut += aBuf[i].n;
13827   }
13828   if( nOut==0 ) return;
13829   nOut += p1->n + 9 + 10*nBuf;
13830 
13831   /* The maximum size of the output is equal to the sum of the
13832   ** input sizes + 1 varint (9 bytes). The extra varint is because if the
13833   ** first rowid in one input is a large negative number, and the first in
13834   ** the other a non-negative number, the delta for the non-negative
13835   ** number will be larger on disk than the literal integer value
13836   ** was.
13837   **
13838   ** Or, if the input position-lists are corrupt, then the output might
13839   ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1
13840   ** (the value PoslistNext64() uses for EOF) as a position and appending
13841   ** it to the output. This can happen at most once for each input
13842   ** position-list, hence (nBuf+1) 10 byte paddings.  */
13843   if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return;
13844 
13845   while( pHead ){
13846     fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid);
13847 
13848     if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){
13849       /* Merge data from two or more poslists */
13850       i64 iPrev = 0;
13851       int nTmp = FTS5_DATA_ZERO_PADDING;
13852       int nMerge = 0;
13853       PrefixMerger *pSave = pHead;
13854       PrefixMerger *pThis = 0;
13855       int nTail = 0;
13856 
13857       pHead = 0;
13858       while( pSave && pSave->iter.iRowid==iLastRowid ){
13859         PrefixMerger *pNext = pSave->pNext;
13860         pSave->iOff = 0;
13861         pSave->iPos = 0;
13862         pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize];
13863         fts5PrefixMergerNextPosition(pSave);
13864         nTmp += pSave->iter.nPoslist + 10;
13865         nMerge++;
13866         fts5PrefixMergerInsertByPosition(&pHead, pSave);
13867         pSave = pNext;
13868       }
13869 
13870       if( pHead==0 || pHead->pNext==0 ){
13871         p->rc = FTS5_CORRUPT;
13872         break;
13873       }
13874 
13875       /* See the earlier comment in this function for an explanation of why
13876       ** corrupt input position lists might cause the output to consume
13877       ** at most nMerge*10 bytes of unexpected space. */
13878       if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){
13879         break;
13880       }
13881       fts5BufferZero(&tmp);
13882 
13883       pThis = pHead;
13884       pHead = pThis->pNext;
13885       sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
13886       fts5PrefixMergerNextPosition(pThis);
13887       fts5PrefixMergerInsertByPosition(&pHead, pThis);
13888 
13889       while( pHead->pNext ){
13890         pThis = pHead;
13891         if( pThis->iPos!=iPrev ){
13892           sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos);
13893         }
13894         fts5PrefixMergerNextPosition(pThis);
13895         pHead = pThis->pNext;
13896         fts5PrefixMergerInsertByPosition(&pHead, pThis);
13897       }
13898 
13899       if( pHead->iPos!=iPrev ){
13900         sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos);
13901       }
13902       nTail = pHead->iter.nPoslist - pHead->iOff;
13903 
13904       /* WRITEPOSLISTSIZE */
13905       assert_nc( tmp.n+nTail<=nTmp );
13906       assert( tmp.n+nTail<=nTmp+nMerge*10 );
13907       if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){
13908         if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT;
13909         break;
13910       }
13911       fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2);
13912       fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n);
13913       if( nTail>0 ){
13914         fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail);
13915       }
13916 
13917       pHead = pSave;
13918       for(i=0; i<nBuf+1; i++){
13919         PrefixMerger *pX = &aMerger[i];
13920         if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){
13921           fts5DoclistIterNext(&pX->iter);
13922           fts5PrefixMergerInsertByRowid(&pHead, pX);
13923         }
13924       }
13925 
13926     }else{
13927       /* Copy poslist from pHead to output */
13928       PrefixMerger *pThis = pHead;
13929       Fts5DoclistIter *pI = &pThis->iter;
13930       fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize);
13931       fts5DoclistIterNext(pI);
13932       pHead = pThis->pNext;
13933       fts5PrefixMergerInsertByRowid(&pHead, pThis);
13934     }
13935   }
13936 
13937   fts5BufferFree(p1);
13938   fts5BufferFree(&tmp);
13939   memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING);
13940   *p1 = out;
13941 }
13942 
13943 static void fts5SetupPrefixIter(
13944   Fts5Index *p,                   /* Index to read from */
13945   int bDesc,                      /* True for "ORDER BY rowid DESC" */
13946   int iIdx,                       /* Index to scan for data */
13947   u8 *pToken,                     /* Buffer containing prefix to match */
13948   int nToken,                     /* Size of buffer pToken in bytes */
13949   Fts5Colset *pColset,            /* Restrict matches to these columns */
13950   Fts5Iter **ppIter          /* OUT: New iterator */
13951 ){
13952   Fts5Structure *pStruct;
13953   Fts5Buffer *aBuf;
13954   int nBuf = 32;
13955   int nMerge = 1;
13956 
13957   void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*);
13958   void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*);
13959   if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){
13960     xMerge = fts5MergeRowidLists;
13961     xAppend = fts5AppendRowid;
13962   }else{
13963     nMerge = FTS5_MERGE_NLIST-1;
13964     nBuf = nMerge*8;   /* Sufficient to merge (16^8)==(2^32) lists */
13965     xMerge = fts5MergePrefixLists;
13966     xAppend = fts5AppendPoslist;
13967   }
13968 
13969   aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf);
13970   pStruct = fts5StructureRead(p);
13971 
13972   if( aBuf && pStruct ){
13973     const int flags = FTS5INDEX_QUERY_SCAN
13974                     | FTS5INDEX_QUERY_SKIPEMPTY
13975                     | FTS5INDEX_QUERY_NOOUTPUT;
13976     int i;
13977     i64 iLastRowid = 0;
13978     Fts5Iter *p1 = 0;     /* Iterator used to gather data from index */
13979     Fts5Data *pData;
13980     Fts5Buffer doclist;
13981     int bNewTerm = 1;
13982 
13983     memset(&doclist, 0, sizeof(doclist));
13984     if( iIdx!=0 ){
13985       int dummy = 0;
13986       const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT;
13987       pToken[0] = FTS5_MAIN_PREFIX;
13988       fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1);
13989       fts5IterSetOutputCb(&p->rc, p1);
13990       for(;
13991         fts5MultiIterEof(p, p1)==0;
13992         fts5MultiIterNext2(p, p1, &dummy)
13993       ){
13994         Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
13995         p1->xSetOutputs(p1, pSeg);
13996         if( p1->base.nData ){
13997           xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
13998           iLastRowid = p1->base.iRowid;
13999         }
14000       }
14001       fts5MultiIterFree(p1);
14002     }
14003 
14004     pToken[0] = FTS5_MAIN_PREFIX + iIdx;
14005     fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1);
14006     fts5IterSetOutputCb(&p->rc, p1);
14007     for( /* no-op */ ;
14008         fts5MultiIterEof(p, p1)==0;
14009         fts5MultiIterNext2(p, p1, &bNewTerm)
14010     ){
14011       Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ];
14012       int nTerm = pSeg->term.n;
14013       const u8 *pTerm = pSeg->term.p;
14014       p1->xSetOutputs(p1, pSeg);
14015 
14016       assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 );
14017       if( bNewTerm ){
14018         if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break;
14019       }
14020 
14021       if( p1->base.nData==0 ) continue;
14022 
14023       if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){
14024         for(i=0; p->rc==SQLITE_OK && doclist.n; i++){
14025           int i1 = i*nMerge;
14026           int iStore;
14027           assert( i1+nMerge<=nBuf );
14028           for(iStore=i1; iStore<i1+nMerge; iStore++){
14029             if( aBuf[iStore].n==0 ){
14030               fts5BufferSwap(&doclist, &aBuf[iStore]);
14031               fts5BufferZero(&doclist);
14032               break;
14033             }
14034           }
14035           if( iStore==i1+nMerge ){
14036             xMerge(p, &doclist, nMerge, &aBuf[i1]);
14037             for(iStore=i1; iStore<i1+nMerge; iStore++){
14038               fts5BufferZero(&aBuf[iStore]);
14039             }
14040           }
14041         }
14042         iLastRowid = 0;
14043       }
14044 
14045       xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist);
14046       iLastRowid = p1->base.iRowid;
14047     }
14048 
14049     assert( (nBuf%nMerge)==0 );
14050     for(i=0; i<nBuf; i+=nMerge){
14051       int iFree;
14052       if( p->rc==SQLITE_OK ){
14053         xMerge(p, &doclist, nMerge, &aBuf[i]);
14054       }
14055       for(iFree=i; iFree<i+nMerge; iFree++){
14056         fts5BufferFree(&aBuf[iFree]);
14057       }
14058     }
14059     fts5MultiIterFree(p1);
14060 
14061     pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING);
14062     if( pData ){
14063       pData->p = (u8*)&pData[1];
14064       pData->nn = pData->szLeaf = doclist.n;
14065       if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n);
14066       fts5MultiIterNew2(p, pData, bDesc, ppIter);
14067     }
14068     fts5BufferFree(&doclist);
14069   }
14070 
14071   fts5StructureRelease(pStruct);
14072   sqlite3_free(aBuf);
14073 }
14074 
14075 
14076 /*
14077 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain
14078 ** to the document with rowid iRowid.
14079 */
14080 static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){
14081   assert( p->rc==SQLITE_OK );
14082 
14083   /* Allocate the hash table if it has not already been allocated */
14084   if( p->pHash==0 ){
14085     p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData);
14086   }
14087 
14088   /* Flush the hash table to disk if required */
14089   if( iRowid<p->iWriteRowid
14090    || (iRowid==p->iWriteRowid && p->bDelete==0)
14091    || (p->nPendingData > p->pConfig->nHashSize)
14092   ){
14093     fts5IndexFlush(p);
14094   }
14095 
14096   p->iWriteRowid = iRowid;
14097   p->bDelete = bDelete;
14098   return fts5IndexReturn(p);
14099 }
14100 
14101 /*
14102 ** Commit data to disk.
14103 */
14104 static int sqlite3Fts5IndexSync(Fts5Index *p){
14105   assert( p->rc==SQLITE_OK );
14106   fts5IndexFlush(p);
14107   sqlite3Fts5IndexCloseReader(p);
14108   return fts5IndexReturn(p);
14109 }
14110 
14111 /*
14112 ** Discard any data stored in the in-memory hash tables. Do not write it
14113 ** to the database. Additionally, assume that the contents of the %_data
14114 ** table may have changed on disk. So any in-memory caches of %_data
14115 ** records must be invalidated.
14116 */
14117 static int sqlite3Fts5IndexRollback(Fts5Index *p){
14118   sqlite3Fts5IndexCloseReader(p);
14119   fts5IndexDiscardData(p);
14120   fts5StructureInvalidate(p);
14121   /* assert( p->rc==SQLITE_OK ); */
14122   return SQLITE_OK;
14123 }
14124 
14125 /*
14126 ** The %_data table is completely empty when this function is called. This
14127 ** function populates it with the initial structure objects for each index,
14128 ** and the initial version of the "averages" record (a zero-byte blob).
14129 */
14130 static int sqlite3Fts5IndexReinit(Fts5Index *p){
14131   Fts5Structure s;
14132   fts5StructureInvalidate(p);
14133   fts5IndexDiscardData(p);
14134   memset(&s, 0, sizeof(Fts5Structure));
14135   fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0);
14136   fts5StructureWrite(p, &s);
14137   return fts5IndexReturn(p);
14138 }
14139 
14140 /*
14141 ** Open a new Fts5Index handle. If the bCreate argument is true, create
14142 ** and initialize the underlying %_data table.
14143 **
14144 ** If successful, set *pp to point to the new object and return SQLITE_OK.
14145 ** Otherwise, set *pp to NULL and return an SQLite error code.
14146 */
14147 static int sqlite3Fts5IndexOpen(
14148   Fts5Config *pConfig,
14149   int bCreate,
14150   Fts5Index **pp,
14151   char **pzErr
14152 ){
14153   int rc = SQLITE_OK;
14154   Fts5Index *p;                   /* New object */
14155 
14156   *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index));
14157   if( rc==SQLITE_OK ){
14158     p->pConfig = pConfig;
14159     p->nWorkUnit = FTS5_WORK_UNIT;
14160     p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName);
14161     if( p->zDataTbl && bCreate ){
14162       rc = sqlite3Fts5CreateTable(
14163           pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
14164       );
14165       if( rc==SQLITE_OK ){
14166         rc = sqlite3Fts5CreateTable(pConfig, "idx",
14167             "segid, term, pgno, PRIMARY KEY(segid, term)",
14168             1, pzErr
14169         );
14170       }
14171       if( rc==SQLITE_OK ){
14172         rc = sqlite3Fts5IndexReinit(p);
14173       }
14174     }
14175   }
14176 
14177   assert( rc!=SQLITE_OK || p->rc==SQLITE_OK );
14178   if( rc ){
14179     sqlite3Fts5IndexClose(p);
14180     *pp = 0;
14181   }
14182   return rc;
14183 }
14184 
14185 /*
14186 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen().
14187 */
14188 static int sqlite3Fts5IndexClose(Fts5Index *p){
14189   int rc = SQLITE_OK;
14190   if( p ){
14191     assert( p->pReader==0 );
14192     fts5StructureInvalidate(p);
14193     sqlite3_finalize(p->pWriter);
14194     sqlite3_finalize(p->pDeleter);
14195     sqlite3_finalize(p->pIdxWriter);
14196     sqlite3_finalize(p->pIdxDeleter);
14197     sqlite3_finalize(p->pIdxSelect);
14198     sqlite3_finalize(p->pDataVersion);
14199     sqlite3Fts5HashFree(p->pHash);
14200     sqlite3_free(p->zDataTbl);
14201     sqlite3_free(p);
14202   }
14203   return rc;
14204 }
14205 
14206 /*
14207 ** Argument p points to a buffer containing utf-8 text that is n bytes in
14208 ** size. Return the number of bytes in the nChar character prefix of the
14209 ** buffer, or 0 if there are less than nChar characters in total.
14210 */
14211 static int sqlite3Fts5IndexCharlenToBytelen(
14212   const char *p,
14213   int nByte,
14214   int nChar
14215 ){
14216   int n = 0;
14217   int i;
14218   for(i=0; i<nChar; i++){
14219     if( n>=nByte ) return 0;      /* Input contains fewer than nChar chars */
14220     if( (unsigned char)p[n++]>=0xc0 ){
14221       if( n>=nByte ) return 0;
14222       while( (p[n] & 0xc0)==0x80 ){
14223         n++;
14224         if( n>=nByte ){
14225           if( i+1==nChar ) break;
14226           return 0;
14227         }
14228       }
14229     }
14230   }
14231   return n;
14232 }
14233 
14234 /*
14235 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of
14236 ** unicode characters in the string.
14237 */
14238 static int fts5IndexCharlen(const char *pIn, int nIn){
14239   int nChar = 0;
14240   int i = 0;
14241   while( i<nIn ){
14242     if( (unsigned char)pIn[i++]>=0xc0 ){
14243       while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++;
14244     }
14245     nChar++;
14246   }
14247   return nChar;
14248 }
14249 
14250 /*
14251 ** Insert or remove data to or from the index. Each time a document is
14252 ** added to or removed from the index, this function is called one or more
14253 ** times.
14254 **
14255 ** For an insert, it must be called once for each token in the new document.
14256 ** If the operation is a delete, it must be called (at least) once for each
14257 ** unique token in the document with an iCol value less than zero. The iPos
14258 ** argument is ignored for a delete.
14259 */
14260 static int sqlite3Fts5IndexWrite(
14261   Fts5Index *p,                   /* Index to write to */
14262   int iCol,                       /* Column token appears in (-ve -> delete) */
14263   int iPos,                       /* Position of token within column */
14264   const char *pToken, int nToken  /* Token to add or remove to or from index */
14265 ){
14266   int i;                          /* Used to iterate through indexes */
14267   int rc = SQLITE_OK;             /* Return code */
14268   Fts5Config *pConfig = p->pConfig;
14269 
14270   assert( p->rc==SQLITE_OK );
14271   assert( (iCol<0)==p->bDelete );
14272 
14273   /* Add the entry to the main terms index. */
14274   rc = sqlite3Fts5HashWrite(
14275       p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken
14276   );
14277 
14278   for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){
14279     const int nChar = pConfig->aPrefix[i];
14280     int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
14281     if( nByte ){
14282       rc = sqlite3Fts5HashWrite(p->pHash,
14283           p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken,
14284           nByte
14285       );
14286     }
14287   }
14288 
14289   return rc;
14290 }
14291 
14292 /*
14293 ** Open a new iterator to iterate though all rowid that match the
14294 ** specified token or token prefix.
14295 */
14296 static int sqlite3Fts5IndexQuery(
14297   Fts5Index *p,                   /* FTS index to query */
14298   const char *pToken, int nToken, /* Token (or prefix) to query for */
14299   int flags,                      /* Mask of FTS5INDEX_QUERY_X flags */
14300   Fts5Colset *pColset,            /* Match these columns only */
14301   Fts5IndexIter **ppIter          /* OUT: New iterator object */
14302 ){
14303   Fts5Config *pConfig = p->pConfig;
14304   Fts5Iter *pRet = 0;
14305   Fts5Buffer buf = {0, 0, 0};
14306 
14307   /* If the QUERY_SCAN flag is set, all other flags must be clear. */
14308   assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN );
14309 
14310   if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){
14311     int iIdx = 0;                 /* Index to search */
14312     int iPrefixIdx = 0;           /* +1 prefix index */
14313     if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken);
14314 
14315     /* Figure out which index to search and set iIdx accordingly. If this
14316     ** is a prefix query for which there is no prefix index, set iIdx to
14317     ** greater than pConfig->nPrefix to indicate that the query will be
14318     ** satisfied by scanning multiple terms in the main index.
14319     **
14320     ** If the QUERY_TEST_NOIDX flag was specified, then this must be a
14321     ** prefix-query. Instead of using a prefix-index (if one exists),
14322     ** evaluate the prefix query using the main FTS index. This is used
14323     ** for internal sanity checking by the integrity-check in debug
14324     ** mode only.  */
14325 #ifdef SQLITE_DEBUG
14326     if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){
14327       assert( flags & FTS5INDEX_QUERY_PREFIX );
14328       iIdx = 1+pConfig->nPrefix;
14329     }else
14330 #endif
14331     if( flags & FTS5INDEX_QUERY_PREFIX ){
14332       int nChar = fts5IndexCharlen(pToken, nToken);
14333       for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){
14334         int nIdxChar = pConfig->aPrefix[iIdx-1];
14335         if( nIdxChar==nChar ) break;
14336         if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx;
14337       }
14338     }
14339 
14340     if( iIdx<=pConfig->nPrefix ){
14341       /* Straight index lookup */
14342       Fts5Structure *pStruct = fts5StructureRead(p);
14343       buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx);
14344       if( pStruct ){
14345         fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY,
14346             pColset, buf.p, nToken+1, -1, 0, &pRet
14347         );
14348         fts5StructureRelease(pStruct);
14349       }
14350     }else{
14351       /* Scan multiple terms in the main index */
14352       int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0;
14353       fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet);
14354       if( pRet==0 ){
14355         assert( p->rc!=SQLITE_OK );
14356       }else{
14357         assert( pRet->pColset==0 );
14358         fts5IterSetOutputCb(&p->rc, pRet);
14359         if( p->rc==SQLITE_OK ){
14360           Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst];
14361           if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg);
14362         }
14363       }
14364     }
14365 
14366     if( p->rc ){
14367       sqlite3Fts5IterClose((Fts5IndexIter*)pRet);
14368       pRet = 0;
14369       sqlite3Fts5IndexCloseReader(p);
14370     }
14371 
14372     *ppIter = (Fts5IndexIter*)pRet;
14373     sqlite3Fts5BufferFree(&buf);
14374   }
14375   return fts5IndexReturn(p);
14376 }
14377 
14378 /*
14379 ** Return true if the iterator passed as the only argument is at EOF.
14380 */
14381 /*
14382 ** Move to the next matching rowid.
14383 */
14384 static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){
14385   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14386   assert( pIter->pIndex->rc==SQLITE_OK );
14387   fts5MultiIterNext(pIter->pIndex, pIter, 0, 0);
14388   return fts5IndexReturn(pIter->pIndex);
14389 }
14390 
14391 /*
14392 ** Move to the next matching term/rowid. Used by the fts5vocab module.
14393 */
14394 static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){
14395   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14396   Fts5Index *p = pIter->pIndex;
14397 
14398   assert( pIter->pIndex->rc==SQLITE_OK );
14399 
14400   fts5MultiIterNext(p, pIter, 0, 0);
14401   if( p->rc==SQLITE_OK ){
14402     Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ];
14403     if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){
14404       fts5DataRelease(pSeg->pLeaf);
14405       pSeg->pLeaf = 0;
14406       pIter->base.bEof = 1;
14407     }
14408   }
14409 
14410   return fts5IndexReturn(pIter->pIndex);
14411 }
14412 
14413 /*
14414 ** Move to the next matching rowid that occurs at or after iMatch. The
14415 ** definition of "at or after" depends on whether this iterator iterates
14416 ** in ascending or descending rowid order.
14417 */
14418 static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){
14419   Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14420   fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch);
14421   return fts5IndexReturn(pIter->pIndex);
14422 }
14423 
14424 /*
14425 ** Return the current term.
14426 */
14427 static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){
14428   int n;
14429   const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n);
14430   assert_nc( z || n<=1 );
14431   *pn = n-1;
14432   return (z ? &z[1] : 0);
14433 }
14434 
14435 /*
14436 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery().
14437 */
14438 static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){
14439   if( pIndexIter ){
14440     Fts5Iter *pIter = (Fts5Iter*)pIndexIter;
14441     Fts5Index *pIndex = pIter->pIndex;
14442     fts5MultiIterFree(pIter);
14443     sqlite3Fts5IndexCloseReader(pIndex);
14444   }
14445 }
14446 
14447 /*
14448 ** Read and decode the "averages" record from the database.
14449 **
14450 ** Parameter anSize must point to an array of size nCol, where nCol is
14451 ** the number of user defined columns in the FTS table.
14452 */
14453 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){
14454   int nCol = p->pConfig->nCol;
14455   Fts5Data *pData;
14456 
14457   *pnRow = 0;
14458   memset(anSize, 0, sizeof(i64) * nCol);
14459   pData = fts5DataRead(p, FTS5_AVERAGES_ROWID);
14460   if( p->rc==SQLITE_OK && pData->nn ){
14461     int i = 0;
14462     int iCol;
14463     i += fts5GetVarint(&pData->p[i], (u64*)pnRow);
14464     for(iCol=0; i<pData->nn && iCol<nCol; iCol++){
14465       i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]);
14466     }
14467   }
14468 
14469   fts5DataRelease(pData);
14470   return fts5IndexReturn(p);
14471 }
14472 
14473 /*
14474 ** Replace the current "averages" record with the contents of the buffer
14475 ** supplied as the second argument.
14476 */
14477 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){
14478   assert( p->rc==SQLITE_OK );
14479   fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData);
14480   return fts5IndexReturn(p);
14481 }
14482 
14483 /*
14484 ** Return the total number of blocks this module has read from the %_data
14485 ** table since it was created.
14486 */
14487 static int sqlite3Fts5IndexReads(Fts5Index *p){
14488   return p->nRead;
14489 }
14490 
14491 /*
14492 ** Set the 32-bit cookie value stored at the start of all structure
14493 ** records to the value passed as the second argument.
14494 **
14495 ** Return SQLITE_OK if successful, or an SQLite error code if an error
14496 ** occurs.
14497 */
14498 static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){
14499   int rc;                              /* Return code */
14500   Fts5Config *pConfig = p->pConfig;    /* Configuration object */
14501   u8 aCookie[4];                       /* Binary representation of iNew */
14502   sqlite3_blob *pBlob = 0;
14503 
14504   assert( p->rc==SQLITE_OK );
14505   sqlite3Fts5Put32(aCookie, iNew);
14506 
14507   rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl,
14508       "block", FTS5_STRUCTURE_ROWID, 1, &pBlob
14509   );
14510   if( rc==SQLITE_OK ){
14511     sqlite3_blob_write(pBlob, aCookie, 4, 0);
14512     rc = sqlite3_blob_close(pBlob);
14513   }
14514 
14515   return rc;
14516 }
14517 
14518 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){
14519   Fts5Structure *pStruct;
14520   pStruct = fts5StructureRead(p);
14521   fts5StructureRelease(pStruct);
14522   return fts5IndexReturn(p);
14523 }
14524 
14525 
14526 /*************************************************************************
14527 **************************************************************************
14528 ** Below this point is the implementation of the integrity-check
14529 ** functionality.
14530 */
14531 
14532 /*
14533 ** Return a simple checksum value based on the arguments.
14534 */
14535 static u64 sqlite3Fts5IndexEntryCksum(
14536   i64 iRowid,
14537   int iCol,
14538   int iPos,
14539   int iIdx,
14540   const char *pTerm,
14541   int nTerm
14542 ){
14543   int i;
14544   u64 ret = iRowid;
14545   ret += (ret<<3) + iCol;
14546   ret += (ret<<3) + iPos;
14547   if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx);
14548   for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i];
14549   return ret;
14550 }
14551 
14552 #ifdef SQLITE_DEBUG
14553 /*
14554 ** This function is purely an internal test. It does not contribute to
14555 ** FTS functionality, or even the integrity-check, in any way.
14556 **
14557 ** Instead, it tests that the same set of pgno/rowid combinations are
14558 ** visited regardless of whether the doclist-index identified by parameters
14559 ** iSegid/iLeaf is iterated in forwards or reverse order.
14560 */
14561 static void fts5TestDlidxReverse(
14562   Fts5Index *p,
14563   int iSegid,                     /* Segment id to load from */
14564   int iLeaf                       /* Load doclist-index for this leaf */
14565 ){
14566   Fts5DlidxIter *pDlidx = 0;
14567   u64 cksum1 = 13;
14568   u64 cksum2 = 13;
14569 
14570   for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf);
14571       fts5DlidxIterEof(p, pDlidx)==0;
14572       fts5DlidxIterNext(p, pDlidx)
14573   ){
14574     i64 iRowid = fts5DlidxIterRowid(pDlidx);
14575     int pgno = fts5DlidxIterPgno(pDlidx);
14576     assert( pgno>iLeaf );
14577     cksum1 += iRowid + ((i64)pgno<<32);
14578   }
14579   fts5DlidxIterFree(pDlidx);
14580   pDlidx = 0;
14581 
14582   for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf);
14583       fts5DlidxIterEof(p, pDlidx)==0;
14584       fts5DlidxIterPrev(p, pDlidx)
14585   ){
14586     i64 iRowid = fts5DlidxIterRowid(pDlidx);
14587     int pgno = fts5DlidxIterPgno(pDlidx);
14588     assert( fts5DlidxIterPgno(pDlidx)>iLeaf );
14589     cksum2 += iRowid + ((i64)pgno<<32);
14590   }
14591   fts5DlidxIterFree(pDlidx);
14592   pDlidx = 0;
14593 
14594   if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT;
14595 }
14596 
14597 static int fts5QueryCksum(
14598   Fts5Index *p,                   /* Fts5 index object */
14599   int iIdx,
14600   const char *z,                  /* Index key to query for */
14601   int n,                          /* Size of index key in bytes */
14602   int flags,                      /* Flags for Fts5IndexQuery */
14603   u64 *pCksum                     /* IN/OUT: Checksum value */
14604 ){
14605   int eDetail = p->pConfig->eDetail;
14606   u64 cksum = *pCksum;
14607   Fts5IndexIter *pIter = 0;
14608   int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter);
14609 
14610   while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){
14611     i64 rowid = pIter->iRowid;
14612 
14613     if( eDetail==FTS5_DETAIL_NONE ){
14614       cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n);
14615     }else{
14616       Fts5PoslistReader sReader;
14617       for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader);
14618           sReader.bEof==0;
14619           sqlite3Fts5PoslistReaderNext(&sReader)
14620       ){
14621         int iCol = FTS5_POS2COLUMN(sReader.iPos);
14622         int iOff = FTS5_POS2OFFSET(sReader.iPos);
14623         cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n);
14624       }
14625     }
14626     if( rc==SQLITE_OK ){
14627       rc = sqlite3Fts5IterNext(pIter);
14628     }
14629   }
14630   sqlite3Fts5IterClose(pIter);
14631 
14632   *pCksum = cksum;
14633   return rc;
14634 }
14635 
14636 /*
14637 ** Check if buffer z[], size n bytes, contains as series of valid utf-8
14638 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not
14639 ** contain valid utf-8, return non-zero.
14640 */
14641 static int fts5TestUtf8(const char *z, int n){
14642   int i = 0;
14643   assert_nc( n>0 );
14644   while( i<n ){
14645     if( (z[i] & 0x80)==0x00 ){
14646       i++;
14647     }else
14648     if( (z[i] & 0xE0)==0xC0 ){
14649       if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1;
14650       i += 2;
14651     }else
14652     if( (z[i] & 0xF0)==0xE0 ){
14653       if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
14654       i += 3;
14655     }else
14656     if( (z[i] & 0xF8)==0xF0 ){
14657       if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1;
14658       if( (z[i+2] & 0xC0)!=0x80 ) return 1;
14659       i += 3;
14660     }else{
14661       return 1;
14662     }
14663   }
14664 
14665   return 0;
14666 }
14667 
14668 /*
14669 ** This function is also purely an internal test. It does not contribute to
14670 ** FTS functionality, or even the integrity-check, in any way.
14671 */
14672 static void fts5TestTerm(
14673   Fts5Index *p,
14674   Fts5Buffer *pPrev,              /* Previous term */
14675   const char *z, int n,           /* Possibly new term to test */
14676   u64 expected,
14677   u64 *pCksum
14678 ){
14679   int rc = p->rc;
14680   if( pPrev->n==0 ){
14681     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
14682   }else
14683   if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){
14684     u64 cksum3 = *pCksum;
14685     const char *zTerm = (const char*)&pPrev->p[1];  /* term sans prefix-byte */
14686     int nTerm = pPrev->n-1;            /* Size of zTerm in bytes */
14687     int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX);
14688     int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
14689     u64 ck1 = 0;
14690     u64 ck2 = 0;
14691 
14692     /* Check that the results returned for ASC and DESC queries are
14693     ** the same. If not, call this corruption.  */
14694     rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1);
14695     if( rc==SQLITE_OK ){
14696       int f = flags|FTS5INDEX_QUERY_DESC;
14697       rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14698     }
14699     if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14700 
14701     /* If this is a prefix query, check that the results returned if the
14702     ** the index is disabled are the same. In both ASC and DESC order.
14703     **
14704     ** This check may only be performed if the hash table is empty. This
14705     ** is because the hash table only supports a single scan query at
14706     ** a time, and the multi-iter loop from which this function is called
14707     ** is already performing such a scan.
14708     **
14709     ** Also only do this if buffer zTerm contains nTerm bytes of valid
14710     ** utf-8. Otherwise, the last part of the buffer contents might contain
14711     ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8
14712     ** character stored in the main fts index, which will cause the
14713     ** test to fail.  */
14714     if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){
14715       if( iIdx>0 && rc==SQLITE_OK ){
14716         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX;
14717         ck2 = 0;
14718         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14719         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14720       }
14721       if( iIdx>0 && rc==SQLITE_OK ){
14722         int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC;
14723         ck2 = 0;
14724         rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2);
14725         if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT;
14726       }
14727     }
14728 
14729     cksum3 ^= ck1;
14730     fts5BufferSet(&rc, pPrev, n, (const u8*)z);
14731 
14732     if( rc==SQLITE_OK && cksum3!=expected ){
14733       rc = FTS5_CORRUPT;
14734     }
14735     *pCksum = cksum3;
14736   }
14737   p->rc = rc;
14738 }
14739 
14740 #else
14741 # define fts5TestDlidxReverse(x,y,z)
14742 # define fts5TestTerm(u,v,w,x,y,z)
14743 #endif
14744 
14745 /*
14746 ** Check that:
14747 **
14748 **   1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
14749 **      contain zero terms.
14750 **   2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
14751 **      contain zero rowids.
14752 */
14753 static void fts5IndexIntegrityCheckEmpty(
14754   Fts5Index *p,
14755   Fts5StructureSegment *pSeg,     /* Segment to check internal consistency */
14756   int iFirst,
14757   int iNoRowid,
14758   int iLast
14759 ){
14760   int i;
14761 
14762   /* Now check that the iter.nEmpty leaves following the current leaf
14763   ** (a) exist and (b) contain no terms. */
14764   for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
14765     Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i));
14766     if( pLeaf ){
14767       if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT;
14768       if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT;
14769     }
14770     fts5DataRelease(pLeaf);
14771   }
14772 }
14773 
14774 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){
14775   int iTermOff = 0;
14776   int ii;
14777 
14778   Fts5Buffer buf1 = {0,0,0};
14779   Fts5Buffer buf2 = {0,0,0};
14780 
14781   ii = pLeaf->szLeaf;
14782   while( ii<pLeaf->nn && p->rc==SQLITE_OK ){
14783     int res;
14784     int iOff;
14785     int nIncr;
14786 
14787     ii += fts5GetVarint32(&pLeaf->p[ii], nIncr);
14788     iTermOff += nIncr;
14789     iOff = iTermOff;
14790 
14791     if( iOff>=pLeaf->szLeaf ){
14792       p->rc = FTS5_CORRUPT;
14793     }else if( iTermOff==nIncr ){
14794       int nByte;
14795       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
14796       if( (iOff+nByte)>pLeaf->szLeaf ){
14797         p->rc = FTS5_CORRUPT;
14798       }else{
14799         fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
14800       }
14801     }else{
14802       int nKeep, nByte;
14803       iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep);
14804       iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte);
14805       if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){
14806         p->rc = FTS5_CORRUPT;
14807       }else{
14808         buf1.n = nKeep;
14809         fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]);
14810       }
14811 
14812       if( p->rc==SQLITE_OK ){
14813         res = fts5BufferCompare(&buf1, &buf2);
14814         if( res<=0 ) p->rc = FTS5_CORRUPT;
14815       }
14816     }
14817     fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p);
14818   }
14819 
14820   fts5BufferFree(&buf1);
14821   fts5BufferFree(&buf2);
14822 }
14823 
14824 static void fts5IndexIntegrityCheckSegment(
14825   Fts5Index *p,                   /* FTS5 backend object */
14826   Fts5StructureSegment *pSeg      /* Segment to check internal consistency */
14827 ){
14828   Fts5Config *pConfig = p->pConfig;
14829   sqlite3_stmt *pStmt = 0;
14830   int rc2;
14831   int iIdxPrevLeaf = pSeg->pgnoFirst-1;
14832   int iDlidxPrevLeaf = pSeg->pgnoLast;
14833 
14834   if( pSeg->pgnoFirst==0 ) return;
14835 
14836   fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
14837       "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d "
14838       "ORDER BY 1, 2",
14839       pConfig->zDb, pConfig->zName, pSeg->iSegid
14840   ));
14841 
14842   /* Iterate through the b-tree hierarchy.  */
14843   while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
14844     i64 iRow;                     /* Rowid for this leaf */
14845     Fts5Data *pLeaf;              /* Data for this leaf */
14846 
14847     const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1);
14848     int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
14849     int iIdxLeaf = sqlite3_column_int(pStmt, 2);
14850     int bIdxDlidx = sqlite3_column_int(pStmt, 3);
14851 
14852     /* If the leaf in question has already been trimmed from the segment,
14853     ** ignore this b-tree entry. Otherwise, load it into memory. */
14854     if( iIdxLeaf<pSeg->pgnoFirst ) continue;
14855     iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf);
14856     pLeaf = fts5LeafRead(p, iRow);
14857     if( pLeaf==0 ) break;
14858 
14859     /* Check that the leaf contains at least one term, and that it is equal
14860     ** to or larger than the split-key in zIdxTerm.  Also check that if there
14861     ** is also a rowid pointer within the leaf page header, it points to a
14862     ** location before the term.  */
14863     if( pLeaf->nn<=pLeaf->szLeaf ){
14864       p->rc = FTS5_CORRUPT;
14865     }else{
14866       int iOff;                   /* Offset of first term on leaf */
14867       int iRowidOff;              /* Offset of first rowid on leaf */
14868       int nTerm;                  /* Size of term on leaf in bytes */
14869       int res;                    /* Comparison of term and split-key */
14870 
14871       iOff = fts5LeafFirstTermOff(pLeaf);
14872       iRowidOff = fts5LeafFirstRowidOff(pLeaf);
14873       if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){
14874         p->rc = FTS5_CORRUPT;
14875       }else{
14876         iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
14877         res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
14878         if( res==0 ) res = nTerm - nIdxTerm;
14879         if( res<0 ) p->rc = FTS5_CORRUPT;
14880       }
14881 
14882       fts5IntegrityCheckPgidx(p, pLeaf);
14883     }
14884     fts5DataRelease(pLeaf);
14885     if( p->rc ) break;
14886 
14887     /* Now check that the iter.nEmpty leaves following the current leaf
14888     ** (a) exist and (b) contain no terms. */
14889     fts5IndexIntegrityCheckEmpty(
14890         p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
14891     );
14892     if( p->rc ) break;
14893 
14894     /* If there is a doclist-index, check that it looks right. */
14895     if( bIdxDlidx ){
14896       Fts5DlidxIter *pDlidx = 0;  /* For iterating through doclist index */
14897       int iPrevLeaf = iIdxLeaf;
14898       int iSegid = pSeg->iSegid;
14899       int iPg = 0;
14900       i64 iKey;
14901 
14902       for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
14903           fts5DlidxIterEof(p, pDlidx)==0;
14904           fts5DlidxIterNext(p, pDlidx)
14905       ){
14906 
14907         /* Check any rowid-less pages that occur before the current leaf. */
14908         for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){
14909           iKey = FTS5_SEGMENT_ROWID(iSegid, iPg);
14910           pLeaf = fts5DataRead(p, iKey);
14911           if( pLeaf ){
14912             if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT;
14913             fts5DataRelease(pLeaf);
14914           }
14915         }
14916         iPrevLeaf = fts5DlidxIterPgno(pDlidx);
14917 
14918         /* Check that the leaf page indicated by the iterator really does
14919         ** contain the rowid suggested by the same. */
14920         iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf);
14921         pLeaf = fts5DataRead(p, iKey);
14922         if( pLeaf ){
14923           i64 iRowid;
14924           int iRowidOff = fts5LeafFirstRowidOff(pLeaf);
14925           ASSERT_SZLEAF_OK(pLeaf);
14926           if( iRowidOff>=pLeaf->szLeaf ){
14927             p->rc = FTS5_CORRUPT;
14928           }else{
14929             fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
14930             if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT;
14931           }
14932           fts5DataRelease(pLeaf);
14933         }
14934       }
14935 
14936       iDlidxPrevLeaf = iPg;
14937       fts5DlidxIterFree(pDlidx);
14938       fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
14939     }else{
14940       iDlidxPrevLeaf = pSeg->pgnoLast;
14941       /* TODO: Check there is no doclist index */
14942     }
14943 
14944     iIdxPrevLeaf = iIdxLeaf;
14945   }
14946 
14947   rc2 = sqlite3_finalize(pStmt);
14948   if( p->rc==SQLITE_OK ) p->rc = rc2;
14949 
14950   /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
14951 #if 0
14952   if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
14953     p->rc = FTS5_CORRUPT;
14954   }
14955 #endif
14956 }
14957 
14958 
14959 /*
14960 ** Run internal checks to ensure that the FTS index (a) is internally
14961 ** consistent and (b) contains entries for which the XOR of the checksums
14962 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum.
14963 **
14964 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the
14965 ** checksum does not match. Return SQLITE_OK if all checks pass without
14966 ** error, or some other SQLite error code if another error (e.g. OOM)
14967 ** occurs.
14968 */
14969 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){
14970   int eDetail = p->pConfig->eDetail;
14971   u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
14972   Fts5Buffer poslist = {0,0,0};   /* Buffer used to hold a poslist */
14973   Fts5Iter *pIter;                /* Used to iterate through entire index */
14974   Fts5Structure *pStruct;         /* Index structure */
14975   int iLvl, iSeg;
14976 
14977 #ifdef SQLITE_DEBUG
14978   /* Used by extra internal tests only run if NDEBUG is not defined */
14979   u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
14980   Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
14981 #endif
14982   const int flags = FTS5INDEX_QUERY_NOOUTPUT;
14983 
14984   /* Load the FTS index structure */
14985   pStruct = fts5StructureRead(p);
14986   if( pStruct==0 ){
14987     assert( p->rc!=SQLITE_OK );
14988     return fts5IndexReturn(p);
14989   }
14990 
14991   /* Check that the internal nodes of each segment match the leaves */
14992   for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){
14993     for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){
14994       Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg];
14995       fts5IndexIntegrityCheckSegment(p, pSeg);
14996     }
14997   }
14998 
14999   /* The cksum argument passed to this function is a checksum calculated
15000   ** based on all expected entries in the FTS index (including prefix index
15001   ** entries). This block checks that a checksum calculated based on the
15002   ** actual contents of FTS index is identical.
15003   **
15004   ** Two versions of the same checksum are calculated. The first (stack
15005   ** variable cksum2) based on entries extracted from the full-text index
15006   ** while doing a linear scan of each individual index in turn.
15007   **
15008   ** As each term visited by the linear scans, a separate query for the
15009   ** same term is performed. cksum3 is calculated based on the entries
15010   ** extracted by these queries.
15011   */
15012   for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter);
15013       fts5MultiIterEof(p, pIter)==0;
15014       fts5MultiIterNext(p, pIter, 0, 0)
15015   ){
15016     int n;                      /* Size of term in bytes */
15017     i64 iPos = 0;               /* Position read from poslist */
15018     int iOff = 0;               /* Offset within poslist */
15019     i64 iRowid = fts5MultiIterRowid(pIter);
15020     char *z = (char*)fts5MultiIterTerm(pIter, &n);
15021 
15022     /* If this is a new term, query for it. Update cksum3 with the results. */
15023     fts5TestTerm(p, &term, z, n, cksum2, &cksum3);
15024     if( p->rc ) break;
15025 
15026     if( eDetail==FTS5_DETAIL_NONE ){
15027       if( 0==fts5MultiIterIsEmpty(p, pIter) ){
15028         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n);
15029       }
15030     }else{
15031       poslist.n = 0;
15032       fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist);
15033       fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0");
15034       while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){
15035         int iCol = FTS5_POS2COLUMN(iPos);
15036         int iTokOff = FTS5_POS2OFFSET(iPos);
15037         cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n);
15038       }
15039     }
15040   }
15041   fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3);
15042 
15043   fts5MultiIterFree(pIter);
15044   if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
15045 
15046   fts5StructureRelease(pStruct);
15047 #ifdef SQLITE_DEBUG
15048   fts5BufferFree(&term);
15049 #endif
15050   fts5BufferFree(&poslist);
15051   return fts5IndexReturn(p);
15052 }
15053 
15054 /*************************************************************************
15055 **************************************************************************
15056 ** Below this point is the implementation of the fts5_decode() scalar
15057 ** function only.
15058 */
15059 
15060 #ifdef SQLITE_TEST
15061 /*
15062 ** Decode a segment-data rowid from the %_data table. This function is
15063 ** the opposite of macro FTS5_SEGMENT_ROWID().
15064 */
15065 static void fts5DecodeRowid(
15066   i64 iRowid,                     /* Rowid from %_data table */
15067   int *piSegid,                   /* OUT: Segment id */
15068   int *pbDlidx,                   /* OUT: Dlidx flag */
15069   int *piHeight,                  /* OUT: Height */
15070   int *piPgno                     /* OUT: Page number */
15071 ){
15072   *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1));
15073   iRowid >>= FTS5_DATA_PAGE_B;
15074 
15075   *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1));
15076   iRowid >>= FTS5_DATA_HEIGHT_B;
15077 
15078   *pbDlidx = (int)(iRowid & 0x0001);
15079   iRowid >>= FTS5_DATA_DLI_B;
15080 
15081   *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1));
15082 }
15083 #endif /* SQLITE_TEST */
15084 
15085 #ifdef SQLITE_TEST
15086 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){
15087   int iSegid, iHeight, iPgno, bDlidx;       /* Rowid compenents */
15088   fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno);
15089 
15090   if( iSegid==0 ){
15091     if( iKey==FTS5_AVERAGES_ROWID ){
15092       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} ");
15093     }else{
15094       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}");
15095     }
15096   }
15097   else{
15098     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}",
15099         bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno
15100     );
15101   }
15102 }
15103 #endif /* SQLITE_TEST */
15104 
15105 #ifdef SQLITE_TEST
15106 static void fts5DebugStructure(
15107   int *pRc,                       /* IN/OUT: error code */
15108   Fts5Buffer *pBuf,
15109   Fts5Structure *p
15110 ){
15111   int iLvl, iSeg;                 /* Iterate through levels, segments */
15112 
15113   for(iLvl=0; iLvl<p->nLevel; iLvl++){
15114     Fts5StructureLevel *pLvl = &p->aLevel[iLvl];
15115     sqlite3Fts5BufferAppendPrintf(pRc, pBuf,
15116         " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg
15117     );
15118     for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){
15119       Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg];
15120       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}",
15121           pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast
15122       );
15123     }
15124     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}");
15125   }
15126 }
15127 #endif /* SQLITE_TEST */
15128 
15129 #ifdef SQLITE_TEST
15130 /*
15131 ** This is part of the fts5_decode() debugging aid.
15132 **
15133 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This
15134 ** function appends a human-readable representation of the same object
15135 ** to the buffer passed as the second argument.
15136 */
15137 static void fts5DecodeStructure(
15138   int *pRc,                       /* IN/OUT: error code */
15139   Fts5Buffer *pBuf,
15140   const u8 *pBlob, int nBlob
15141 ){
15142   int rc;                         /* Return code */
15143   Fts5Structure *p = 0;           /* Decoded structure object */
15144 
15145   rc = fts5StructureDecode(pBlob, nBlob, 0, &p);
15146   if( rc!=SQLITE_OK ){
15147     *pRc = rc;
15148     return;
15149   }
15150 
15151   fts5DebugStructure(pRc, pBuf, p);
15152   fts5StructureRelease(p);
15153 }
15154 #endif /* SQLITE_TEST */
15155 
15156 #ifdef SQLITE_TEST
15157 /*
15158 ** This is part of the fts5_decode() debugging aid.
15159 **
15160 ** Arguments pBlob/nBlob contain an "averages" record. This function
15161 ** appends a human-readable representation of record to the buffer passed
15162 ** as the second argument.
15163 */
15164 static void fts5DecodeAverages(
15165   int *pRc,                       /* IN/OUT: error code */
15166   Fts5Buffer *pBuf,
15167   const u8 *pBlob, int nBlob
15168 ){
15169   int i = 0;
15170   const char *zSpace = "";
15171 
15172   while( i<nBlob ){
15173     u64 iVal;
15174     i += sqlite3Fts5GetVarint(&pBlob[i], &iVal);
15175     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal);
15176     zSpace = " ";
15177   }
15178 }
15179 #endif /* SQLITE_TEST */
15180 
15181 #ifdef SQLITE_TEST
15182 /*
15183 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read
15184 ** each varint and append its string representation to buffer pBuf. Return
15185 ** after either the input buffer is exhausted or a 0 value is read.
15186 **
15187 ** The return value is the number of bytes read from the input buffer.
15188 */
15189 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
15190   int iOff = 0;
15191   while( iOff<n ){
15192     int iVal;
15193     iOff += fts5GetVarint32(&a[iOff], iVal);
15194     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal);
15195   }
15196   return iOff;
15197 }
15198 #endif /* SQLITE_TEST */
15199 
15200 #ifdef SQLITE_TEST
15201 /*
15202 ** The start of buffer (a/n) contains the start of a doclist. The doclist
15203 ** may or may not finish within the buffer. This function appends a text
15204 ** representation of the part of the doclist that is present to buffer
15205 ** pBuf.
15206 **
15207 ** The return value is the number of bytes read from the input buffer.
15208 */
15209 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){
15210   i64 iDocid = 0;
15211   int iOff = 0;
15212 
15213   if( n>0 ){
15214     iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid);
15215     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
15216   }
15217   while( iOff<n ){
15218     int nPos;
15219     int bDel;
15220     iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel);
15221     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":"");
15222     iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos));
15223     if( iOff<n ){
15224       i64 iDelta;
15225       iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta);
15226       iDocid += iDelta;
15227       sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid);
15228     }
15229   }
15230 
15231   return iOff;
15232 }
15233 #endif /* SQLITE_TEST */
15234 
15235 #ifdef SQLITE_TEST
15236 /*
15237 ** This function is part of the fts5_decode() debugging function. It is
15238 ** only ever used with detail=none tables.
15239 **
15240 ** Buffer (pData/nData) contains a doclist in the format used by detail=none
15241 ** tables. This function appends a human-readable version of that list to
15242 ** buffer pBuf.
15243 **
15244 ** If *pRc is other than SQLITE_OK when this function is called, it is a
15245 ** no-op. If an OOM or other error occurs within this function, *pRc is
15246 ** set to an SQLite error code before returning. The final state of buffer
15247 ** pBuf is undefined in this case.
15248 */
15249 static void fts5DecodeRowidList(
15250   int *pRc,                       /* IN/OUT: Error code */
15251   Fts5Buffer *pBuf,               /* Buffer to append text to */
15252   const u8 *pData, int nData      /* Data to decode list-of-rowids from */
15253 ){
15254   int i = 0;
15255   i64 iRowid = 0;
15256 
15257   while( i<nData ){
15258     const char *zApp = "";
15259     u64 iVal;
15260     i += sqlite3Fts5GetVarint(&pData[i], &iVal);
15261     iRowid += iVal;
15262 
15263     if( i<nData && pData[i]==0x00 ){
15264       i++;
15265       if( i<nData && pData[i]==0x00 ){
15266         i++;
15267         zApp = "+";
15268       }else{
15269         zApp = "*";
15270       }
15271     }
15272 
15273     sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp);
15274   }
15275 }
15276 #endif /* SQLITE_TEST */
15277 
15278 #ifdef SQLITE_TEST
15279 /*
15280 ** The implementation of user-defined scalar function fts5_decode().
15281 */
15282 static void fts5DecodeFunction(
15283   sqlite3_context *pCtx,          /* Function call context */
15284   int nArg,                       /* Number of args (always 2) */
15285   sqlite3_value **apVal           /* Function arguments */
15286 ){
15287   i64 iRowid;                     /* Rowid for record being decoded */
15288   int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */
15289   const u8 *aBlob; int n;         /* Record to decode */
15290   u8 *a = 0;
15291   Fts5Buffer s;                   /* Build up text to return here */
15292   int rc = SQLITE_OK;             /* Return code */
15293   sqlite3_int64 nSpace = 0;
15294   int eDetailNone = (sqlite3_user_data(pCtx)!=0);
15295 
15296   assert( nArg==2 );
15297   UNUSED_PARAM(nArg);
15298   memset(&s, 0, sizeof(Fts5Buffer));
15299   iRowid = sqlite3_value_int64(apVal[0]);
15300 
15301   /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[]
15302   ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents
15303   ** buffer overreads even if the record is corrupt.  */
15304   n = sqlite3_value_bytes(apVal[1]);
15305   aBlob = sqlite3_value_blob(apVal[1]);
15306   nSpace = n + FTS5_DATA_ZERO_PADDING;
15307   a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace);
15308   if( a==0 ) goto decode_out;
15309   if( n>0 ) memcpy(a, aBlob, n);
15310 
15311   fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno);
15312 
15313   fts5DebugRowid(&rc, &s, iRowid);
15314   if( bDlidx ){
15315     Fts5Data dlidx;
15316     Fts5DlidxLvl lvl;
15317 
15318     dlidx.p = a;
15319     dlidx.nn = n;
15320 
15321     memset(&lvl, 0, sizeof(Fts5DlidxLvl));
15322     lvl.pData = &dlidx;
15323     lvl.iLeafPgno = iPgno;
15324 
15325     for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){
15326       sqlite3Fts5BufferAppendPrintf(&rc, &s,
15327           " %d(%lld)", lvl.iLeafPgno, lvl.iRowid
15328       );
15329     }
15330   }else if( iSegid==0 ){
15331     if( iRowid==FTS5_AVERAGES_ROWID ){
15332       fts5DecodeAverages(&rc, &s, a, n);
15333     }else{
15334       fts5DecodeStructure(&rc, &s, a, n);
15335     }
15336   }else if( eDetailNone ){
15337     Fts5Buffer term;              /* Current term read from page */
15338     int szLeaf;
15339     int iPgidxOff = szLeaf = fts5GetU16(&a[2]);
15340     int iTermOff;
15341     int nKeep = 0;
15342     int iOff;
15343 
15344     memset(&term, 0, sizeof(Fts5Buffer));
15345 
15346     /* Decode any entries that occur before the first term. */
15347     if( szLeaf<n ){
15348       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff);
15349     }else{
15350       iTermOff = szLeaf;
15351     }
15352     fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4);
15353 
15354     iOff = iTermOff;
15355     while( iOff<szLeaf ){
15356       int nAppend;
15357 
15358       /* Read the term data for the next term*/
15359       iOff += fts5GetVarint32(&a[iOff], nAppend);
15360       term.n = nKeep;
15361       fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]);
15362       sqlite3Fts5BufferAppendPrintf(
15363           &rc, &s, " term=%.*s", term.n, (const char*)term.p
15364       );
15365       iOff += nAppend;
15366 
15367       /* Figure out where the doclist for this term ends */
15368       if( iPgidxOff<n ){
15369         int nIncr;
15370         iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr);
15371         iTermOff += nIncr;
15372       }else{
15373         iTermOff = szLeaf;
15374       }
15375 
15376       fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff);
15377       iOff = iTermOff;
15378       if( iOff<szLeaf ){
15379         iOff += fts5GetVarint32(&a[iOff], nKeep);
15380       }
15381     }
15382 
15383     fts5BufferFree(&term);
15384   }else{
15385     Fts5Buffer term;              /* Current term read from page */
15386     int szLeaf;                   /* Offset of pgidx in a[] */
15387     int iPgidxOff;
15388     int iPgidxPrev = 0;           /* Previous value read from pgidx */
15389     int iTermOff = 0;
15390     int iRowidOff = 0;
15391     int iOff;
15392     int nDoclist;
15393 
15394     memset(&term, 0, sizeof(Fts5Buffer));
15395 
15396     if( n<4 ){
15397       sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt");
15398       goto decode_out;
15399     }else{
15400       iRowidOff = fts5GetU16(&a[0]);
15401       iPgidxOff = szLeaf = fts5GetU16(&a[2]);
15402       if( iPgidxOff<n ){
15403         fts5GetVarint32(&a[iPgidxOff], iTermOff);
15404       }else if( iPgidxOff>n ){
15405         rc = FTS5_CORRUPT;
15406         goto decode_out;
15407       }
15408     }
15409 
15410     /* Decode the position list tail at the start of the page */
15411     if( iRowidOff!=0 ){
15412       iOff = iRowidOff;
15413     }else if( iTermOff!=0 ){
15414       iOff = iTermOff;
15415     }else{
15416       iOff = szLeaf;
15417     }
15418     if( iOff>n ){
15419       rc = FTS5_CORRUPT;
15420       goto decode_out;
15421     }
15422     fts5DecodePoslist(&rc, &s, &a[4], iOff-4);
15423 
15424     /* Decode any more doclist data that appears on the page before the
15425     ** first term. */
15426     nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff;
15427     if( nDoclist+iOff>n ){
15428       rc = FTS5_CORRUPT;
15429       goto decode_out;
15430     }
15431     fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist);
15432 
15433     while( iPgidxOff<n && rc==SQLITE_OK ){
15434       int bFirst = (iPgidxOff==szLeaf);     /* True for first term on page */
15435       int nByte;                            /* Bytes of data */
15436       int iEnd;
15437 
15438       iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte);
15439       iPgidxPrev += nByte;
15440       iOff = iPgidxPrev;
15441 
15442       if( iPgidxOff<n ){
15443         fts5GetVarint32(&a[iPgidxOff], nByte);
15444         iEnd = iPgidxPrev + nByte;
15445       }else{
15446         iEnd = szLeaf;
15447       }
15448       if( iEnd>szLeaf ){
15449         rc = FTS5_CORRUPT;
15450         break;
15451       }
15452 
15453       if( bFirst==0 ){
15454         iOff += fts5GetVarint32(&a[iOff], nByte);
15455         if( nByte>term.n ){
15456           rc = FTS5_CORRUPT;
15457           break;
15458         }
15459         term.n = nByte;
15460       }
15461       iOff += fts5GetVarint32(&a[iOff], nByte);
15462       if( iOff+nByte>n ){
15463         rc = FTS5_CORRUPT;
15464         break;
15465       }
15466       fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]);
15467       iOff += nByte;
15468 
15469       sqlite3Fts5BufferAppendPrintf(
15470           &rc, &s, " term=%.*s", term.n, (const char*)term.p
15471       );
15472       iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff);
15473     }
15474 
15475     fts5BufferFree(&term);
15476   }
15477 
15478  decode_out:
15479   sqlite3_free(a);
15480   if( rc==SQLITE_OK ){
15481     sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT);
15482   }else{
15483     sqlite3_result_error_code(pCtx, rc);
15484   }
15485   fts5BufferFree(&s);
15486 }
15487 #endif /* SQLITE_TEST */
15488 
15489 #ifdef SQLITE_TEST
15490 /*
15491 ** The implementation of user-defined scalar function fts5_rowid().
15492 */
15493 static void fts5RowidFunction(
15494   sqlite3_context *pCtx,          /* Function call context */
15495   int nArg,                       /* Number of args (always 2) */
15496   sqlite3_value **apVal           /* Function arguments */
15497 ){
15498   const char *zArg;
15499   if( nArg==0 ){
15500     sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1);
15501   }else{
15502     zArg = (const char*)sqlite3_value_text(apVal[0]);
15503     if( 0==sqlite3_stricmp(zArg, "segment") ){
15504       i64 iRowid;
15505       int segid, pgno;
15506       if( nArg!=3 ){
15507         sqlite3_result_error(pCtx,
15508             "should be: fts5_rowid('segment', segid, pgno))", -1
15509         );
15510       }else{
15511         segid = sqlite3_value_int(apVal[1]);
15512         pgno = sqlite3_value_int(apVal[2]);
15513         iRowid = FTS5_SEGMENT_ROWID(segid, pgno);
15514         sqlite3_result_int64(pCtx, iRowid);
15515       }
15516     }else{
15517       sqlite3_result_error(pCtx,
15518         "first arg to fts5_rowid() must be 'segment'" , -1
15519       );
15520     }
15521   }
15522 }
15523 #endif /* SQLITE_TEST */
15524 
15525 /*
15526 ** This is called as part of registering the FTS5 module with database
15527 ** connection db. It registers several user-defined scalar functions useful
15528 ** with FTS5.
15529 **
15530 ** If successful, SQLITE_OK is returned. If an error occurs, some other
15531 ** SQLite error code is returned instead.
15532 */
15533 static int sqlite3Fts5IndexInit(sqlite3 *db){
15534 #ifdef SQLITE_TEST
15535   int rc = sqlite3_create_function(
15536       db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0
15537   );
15538 
15539   if( rc==SQLITE_OK ){
15540     rc = sqlite3_create_function(
15541         db, "fts5_decode_none", 2,
15542         SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0
15543     );
15544   }
15545 
15546   if( rc==SQLITE_OK ){
15547     rc = sqlite3_create_function(
15548         db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0
15549     );
15550   }
15551   return rc;
15552 #else
15553   return SQLITE_OK;
15554   UNUSED_PARAM(db);
15555 #endif
15556 }
15557 
15558 
15559 static int sqlite3Fts5IndexReset(Fts5Index *p){
15560   assert( p->pStruct==0 || p->iStructVersion!=0 );
15561   if( fts5IndexDataVersion(p)!=p->iStructVersion ){
15562     fts5StructureInvalidate(p);
15563   }
15564   return fts5IndexReturn(p);
15565 }
15566 
15567 #line 1 "fts5_main.c"
15568 /*
15569 ** 2014 Jun 09
15570 **
15571 ** The author disclaims copyright to this source code.  In place of
15572 ** a legal notice, here is a blessing:
15573 **
15574 **    May you do good and not evil.
15575 **    May you find forgiveness for yourself and forgive others.
15576 **    May you share freely, never taking more than you give.
15577 **
15578 ******************************************************************************
15579 **
15580 ** This is an SQLite module implementing full-text search.
15581 */
15582 
15583 
15584 /* #include "fts5Int.h" */
15585 
15586 /*
15587 ** This variable is set to false when running tests for which the on disk
15588 ** structures should not be corrupt. Otherwise, true. If it is false, extra
15589 ** assert() conditions in the fts5 code are activated - conditions that are
15590 ** only true if it is guaranteed that the fts5 database is not corrupt.
15591 */
15592 #ifdef SQLITE_DEBUG
15593 int sqlite3_fts5_may_be_corrupt = 1;
15594 #endif
15595 
15596 
15597 typedef struct Fts5Auxdata Fts5Auxdata;
15598 typedef struct Fts5Auxiliary Fts5Auxiliary;
15599 typedef struct Fts5Cursor Fts5Cursor;
15600 typedef struct Fts5FullTable Fts5FullTable;
15601 typedef struct Fts5Sorter Fts5Sorter;
15602 typedef struct Fts5TokenizerModule Fts5TokenizerModule;
15603 
15604 /*
15605 ** NOTES ON TRANSACTIONS:
15606 **
15607 ** SQLite invokes the following virtual table methods as transactions are
15608 ** opened and closed by the user:
15609 **
15610 **     xBegin():    Start of a new transaction.
15611 **     xSync():     Initial part of two-phase commit.
15612 **     xCommit():   Final part of two-phase commit.
15613 **     xRollback(): Rollback the transaction.
15614 **
15615 ** Anything that is required as part of a commit that may fail is performed
15616 ** in the xSync() callback. Current versions of SQLite ignore any errors
15617 ** returned by xCommit().
15618 **
15619 ** And as sub-transactions are opened/closed:
15620 **
15621 **     xSavepoint(int S):  Open savepoint S.
15622 **     xRelease(int S):    Commit and close savepoint S.
15623 **     xRollbackTo(int S): Rollback to start of savepoint S.
15624 **
15625 ** During a write-transaction the fts5_index.c module may cache some data
15626 ** in-memory. It is flushed to disk whenever xSync(), xRelease() or
15627 ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo()
15628 ** is called.
15629 **
15630 ** Additionally, if SQLITE_DEBUG is defined, an instance of the following
15631 ** structure is used to record the current transaction state. This information
15632 ** is not required, but it is used in the assert() statements executed by
15633 ** function fts5CheckTransactionState() (see below).
15634 */
15635 struct Fts5TransactionState {
15636   int eState;                     /* 0==closed, 1==open, 2==synced */
15637   int iSavepoint;                 /* Number of open savepoints (0 -> none) */
15638 };
15639 
15640 /*
15641 ** A single object of this type is allocated when the FTS5 module is
15642 ** registered with a database handle. It is used to store pointers to
15643 ** all registered FTS5 extensions - tokenizers and auxiliary functions.
15644 */
15645 struct Fts5Global {
15646   fts5_api api;                   /* User visible part of object (see fts5.h) */
15647   sqlite3 *db;                    /* Associated database connection */
15648   i64 iNextId;                    /* Used to allocate unique cursor ids */
15649   Fts5Auxiliary *pAux;            /* First in list of all aux. functions */
15650   Fts5TokenizerModule *pTok;      /* First in list of all tokenizer modules */
15651   Fts5TokenizerModule *pDfltTok;  /* Default tokenizer module */
15652   Fts5Cursor *pCsr;               /* First in list of all open cursors */
15653 };
15654 
15655 /*
15656 ** Each auxiliary function registered with the FTS5 module is represented
15657 ** by an object of the following type. All such objects are stored as part
15658 ** of the Fts5Global.pAux list.
15659 */
15660 struct Fts5Auxiliary {
15661   Fts5Global *pGlobal;            /* Global context for this function */
15662   char *zFunc;                    /* Function name (nul-terminated) */
15663   void *pUserData;                /* User-data pointer */
15664   fts5_extension_function xFunc;  /* Callback function */
15665   void (*xDestroy)(void*);        /* Destructor function */
15666   Fts5Auxiliary *pNext;           /* Next registered auxiliary function */
15667 };
15668 
15669 /*
15670 ** Each tokenizer module registered with the FTS5 module is represented
15671 ** by an object of the following type. All such objects are stored as part
15672 ** of the Fts5Global.pTok list.
15673 */
15674 struct Fts5TokenizerModule {
15675   char *zName;                    /* Name of tokenizer */
15676   void *pUserData;                /* User pointer passed to xCreate() */
15677   fts5_tokenizer x;               /* Tokenizer functions */
15678   void (*xDestroy)(void*);        /* Destructor function */
15679   Fts5TokenizerModule *pNext;     /* Next registered tokenizer module */
15680 };
15681 
15682 struct Fts5FullTable {
15683   Fts5Table p;                    /* Public class members from fts5Int.h */
15684   Fts5Storage *pStorage;          /* Document store */
15685   Fts5Global *pGlobal;            /* Global (connection wide) data */
15686   Fts5Cursor *pSortCsr;           /* Sort data from this cursor */
15687 #ifdef SQLITE_DEBUG
15688   struct Fts5TransactionState ts;
15689 #endif
15690 };
15691 
15692 struct Fts5MatchPhrase {
15693   Fts5Buffer *pPoslist;           /* Pointer to current poslist */
15694   int nTerm;                      /* Size of phrase in terms */
15695 };
15696 
15697 /*
15698 ** pStmt:
15699 **   SELECT rowid, <fts> FROM <fts> ORDER BY +rank;
15700 **
15701 ** aIdx[]:
15702 **   There is one entry in the aIdx[] array for each phrase in the query,
15703 **   the value of which is the offset within aPoslist[] following the last
15704 **   byte of the position list for the corresponding phrase.
15705 */
15706 struct Fts5Sorter {
15707   sqlite3_stmt *pStmt;
15708   i64 iRowid;                     /* Current rowid */
15709   const u8 *aPoslist;             /* Position lists for current row */
15710   int nIdx;                       /* Number of entries in aIdx[] */
15711   int aIdx[1];                    /* Offsets into aPoslist for current row */
15712 };
15713 
15714 
15715 /*
15716 ** Virtual-table cursor object.
15717 **
15718 ** iSpecial:
15719 **   If this is a 'special' query (refer to function fts5SpecialMatch()),
15720 **   then this variable contains the result of the query.
15721 **
15722 ** iFirstRowid, iLastRowid:
15723 **   These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the
15724 **   cursor iterates in ascending order of rowids, iFirstRowid is the lower
15725 **   limit of rowids to return, and iLastRowid the upper. In other words, the
15726 **   WHERE clause in the user's query might have been:
15727 **
15728 **       <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid
15729 **
15730 **   If the cursor iterates in descending order of rowid, iFirstRowid
15731 **   is the upper limit (i.e. the "first" rowid visited) and iLastRowid
15732 **   the lower.
15733 */
15734 struct Fts5Cursor {
15735   sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
15736   Fts5Cursor *pNext;              /* Next cursor in Fts5Cursor.pCsr list */
15737   int *aColumnSize;               /* Values for xColumnSize() */
15738   i64 iCsrId;                     /* Cursor id */
15739 
15740   /* Zero from this point onwards on cursor reset */
15741   int ePlan;                      /* FTS5_PLAN_XXX value */
15742   int bDesc;                      /* True for "ORDER BY rowid DESC" queries */
15743   i64 iFirstRowid;                /* Return no rowids earlier than this */
15744   i64 iLastRowid;                 /* Return no rowids later than this */
15745   sqlite3_stmt *pStmt;            /* Statement used to read %_content */
15746   Fts5Expr *pExpr;                /* Expression for MATCH queries */
15747   Fts5Sorter *pSorter;            /* Sorter for "ORDER BY rank" queries */
15748   int csrflags;                   /* Mask of cursor flags (see below) */
15749   i64 iSpecial;                   /* Result of special query */
15750 
15751   /* "rank" function. Populated on demand from vtab.xColumn(). */
15752   char *zRank;                    /* Custom rank function */
15753   char *zRankArgs;                /* Custom rank function args */
15754   Fts5Auxiliary *pRank;           /* Rank callback (or NULL) */
15755   int nRankArg;                   /* Number of trailing arguments for rank() */
15756   sqlite3_value **apRankArg;      /* Array of trailing arguments */
15757   sqlite3_stmt *pRankArgStmt;     /* Origin of objects in apRankArg[] */
15758 
15759   /* Auxiliary data storage */
15760   Fts5Auxiliary *pAux;            /* Currently executing extension function */
15761   Fts5Auxdata *pAuxdata;          /* First in linked list of saved aux-data */
15762 
15763   /* Cache used by auxiliary functions xInst() and xInstCount() */
15764   Fts5PoslistReader *aInstIter;   /* One for each phrase */
15765   int nInstAlloc;                 /* Size of aInst[] array (entries / 3) */
15766   int nInstCount;                 /* Number of phrase instances */
15767   int *aInst;                     /* 3 integers per phrase instance */
15768 };
15769 
15770 /*
15771 ** Bits that make up the "idxNum" parameter passed indirectly by
15772 ** xBestIndex() to xFilter().
15773 */
15774 #define FTS5_BI_MATCH        0x0001         /* <tbl> MATCH ? */
15775 #define FTS5_BI_RANK         0x0002         /* rank MATCH ? */
15776 #define FTS5_BI_ROWID_EQ     0x0004         /* rowid == ? */
15777 #define FTS5_BI_ROWID_LE     0x0008         /* rowid <= ? */
15778 #define FTS5_BI_ROWID_GE     0x0010         /* rowid >= ? */
15779 
15780 #define FTS5_BI_ORDER_RANK   0x0020
15781 #define FTS5_BI_ORDER_ROWID  0x0040
15782 #define FTS5_BI_ORDER_DESC   0x0080
15783 
15784 /*
15785 ** Values for Fts5Cursor.csrflags
15786 */
15787 #define FTS5CSR_EOF               0x01
15788 #define FTS5CSR_REQUIRE_CONTENT   0x02
15789 #define FTS5CSR_REQUIRE_DOCSIZE   0x04
15790 #define FTS5CSR_REQUIRE_INST      0x08
15791 #define FTS5CSR_FREE_ZRANK        0x10
15792 #define FTS5CSR_REQUIRE_RESEEK    0x20
15793 #define FTS5CSR_REQUIRE_POSLIST   0x40
15794 
15795 #define BitFlagAllTest(x,y) (((x) & (y))==(y))
15796 #define BitFlagTest(x,y)    (((x) & (y))!=0)
15797 
15798 
15799 /*
15800 ** Macros to Set(), Clear() and Test() cursor flags.
15801 */
15802 #define CsrFlagSet(pCsr, flag)   ((pCsr)->csrflags |= (flag))
15803 #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
15804 #define CsrFlagTest(pCsr, flag)  ((pCsr)->csrflags & (flag))
15805 
15806 struct Fts5Auxdata {
15807   Fts5Auxiliary *pAux;            /* Extension to which this belongs */
15808   void *pPtr;                     /* Pointer value */
15809   void(*xDelete)(void*);          /* Destructor */
15810   Fts5Auxdata *pNext;             /* Next object in linked list */
15811 };
15812 
15813 #ifdef SQLITE_DEBUG
15814 #define FTS5_BEGIN      1
15815 #define FTS5_SYNC       2
15816 #define FTS5_COMMIT     3
15817 #define FTS5_ROLLBACK   4
15818 #define FTS5_SAVEPOINT  5
15819 #define FTS5_RELEASE    6
15820 #define FTS5_ROLLBACKTO 7
15821 static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){
15822   switch( op ){
15823     case FTS5_BEGIN:
15824       assert( p->ts.eState==0 );
15825       p->ts.eState = 1;
15826       p->ts.iSavepoint = -1;
15827       break;
15828 
15829     case FTS5_SYNC:
15830       assert( p->ts.eState==1 );
15831       p->ts.eState = 2;
15832       break;
15833 
15834     case FTS5_COMMIT:
15835       assert( p->ts.eState==2 );
15836       p->ts.eState = 0;
15837       break;
15838 
15839     case FTS5_ROLLBACK:
15840       assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 );
15841       p->ts.eState = 0;
15842       break;
15843 
15844     case FTS5_SAVEPOINT:
15845       assert( p->ts.eState==1 );
15846       assert( iSavepoint>=0 );
15847       assert( iSavepoint>=p->ts.iSavepoint );
15848       p->ts.iSavepoint = iSavepoint;
15849       break;
15850 
15851     case FTS5_RELEASE:
15852       assert( p->ts.eState==1 );
15853       assert( iSavepoint>=0 );
15854       assert( iSavepoint<=p->ts.iSavepoint );
15855       p->ts.iSavepoint = iSavepoint-1;
15856       break;
15857 
15858     case FTS5_ROLLBACKTO:
15859       assert( p->ts.eState==1 );
15860       assert( iSavepoint>=-1 );
15861       /* The following assert() can fail if another vtab strikes an error
15862       ** within an xSavepoint() call then SQLite calls xRollbackTo() - without
15863       ** having called xSavepoint() on this vtab.  */
15864       /* assert( iSavepoint<=p->ts.iSavepoint ); */
15865       p->ts.iSavepoint = iSavepoint;
15866       break;
15867   }
15868 }
15869 #else
15870 # define fts5CheckTransactionState(x,y,z)
15871 #endif
15872 
15873 /*
15874 ** Return true if pTab is a contentless table.
15875 */
15876 static int fts5IsContentless(Fts5FullTable *pTab){
15877   return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE;
15878 }
15879 
15880 /*
15881 ** Delete a virtual table handle allocated by fts5InitVtab().
15882 */
15883 static void fts5FreeVtab(Fts5FullTable *pTab){
15884   if( pTab ){
15885     sqlite3Fts5IndexClose(pTab->p.pIndex);
15886     sqlite3Fts5StorageClose(pTab->pStorage);
15887     sqlite3Fts5ConfigFree(pTab->p.pConfig);
15888     sqlite3_free(pTab);
15889   }
15890 }
15891 
15892 /*
15893 ** The xDisconnect() virtual table method.
15894 */
15895 static int fts5DisconnectMethod(sqlite3_vtab *pVtab){
15896   fts5FreeVtab((Fts5FullTable*)pVtab);
15897   return SQLITE_OK;
15898 }
15899 
15900 /*
15901 ** The xDestroy() virtual table method.
15902 */
15903 static int fts5DestroyMethod(sqlite3_vtab *pVtab){
15904   Fts5Table *pTab = (Fts5Table*)pVtab;
15905   int rc = sqlite3Fts5DropAll(pTab->pConfig);
15906   if( rc==SQLITE_OK ){
15907     fts5FreeVtab((Fts5FullTable*)pVtab);
15908   }
15909   return rc;
15910 }
15911 
15912 /*
15913 ** This function is the implementation of both the xConnect and xCreate
15914 ** methods of the FTS3 virtual table.
15915 **
15916 ** The argv[] array contains the following:
15917 **
15918 **   argv[0]   -> module name  ("fts5")
15919 **   argv[1]   -> database name
15920 **   argv[2]   -> table name
15921 **   argv[...] -> "column name" and other module argument fields.
15922 */
15923 static int fts5InitVtab(
15924   int bCreate,                    /* True for xCreate, false for xConnect */
15925   sqlite3 *db,                    /* The SQLite database connection */
15926   void *pAux,                     /* Hash table containing tokenizers */
15927   int argc,                       /* Number of elements in argv array */
15928   const char * const *argv,       /* xCreate/xConnect argument array */
15929   sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
15930   char **pzErr                    /* Write any error message here */
15931 ){
15932   Fts5Global *pGlobal = (Fts5Global*)pAux;
15933   const char **azConfig = (const char**)argv;
15934   int rc = SQLITE_OK;             /* Return code */
15935   Fts5Config *pConfig = 0;        /* Results of parsing argc/argv */
15936   Fts5FullTable *pTab = 0;        /* New virtual table object */
15937 
15938   /* Allocate the new vtab object and parse the configuration */
15939   pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable));
15940   if( rc==SQLITE_OK ){
15941     rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr);
15942     assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 );
15943   }
15944   if( rc==SQLITE_OK ){
15945     pTab->p.pConfig = pConfig;
15946     pTab->pGlobal = pGlobal;
15947   }
15948 
15949   /* Open the index sub-system */
15950   if( rc==SQLITE_OK ){
15951     rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr);
15952   }
15953 
15954   /* Open the storage sub-system */
15955   if( rc==SQLITE_OK ){
15956     rc = sqlite3Fts5StorageOpen(
15957         pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr
15958     );
15959   }
15960 
15961   /* Call sqlite3_declare_vtab() */
15962   if( rc==SQLITE_OK ){
15963     rc = sqlite3Fts5ConfigDeclareVtab(pConfig);
15964   }
15965 
15966   /* Load the initial configuration */
15967   if( rc==SQLITE_OK ){
15968     assert( pConfig->pzErrmsg==0 );
15969     pConfig->pzErrmsg = pzErr;
15970     rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
15971     sqlite3Fts5IndexRollback(pTab->p.pIndex);
15972     pConfig->pzErrmsg = 0;
15973   }
15974 
15975   if( rc!=SQLITE_OK ){
15976     fts5FreeVtab(pTab);
15977     pTab = 0;
15978   }else if( bCreate ){
15979     fts5CheckTransactionState(pTab, FTS5_BEGIN, 0);
15980   }
15981   *ppVTab = (sqlite3_vtab*)pTab;
15982   return rc;
15983 }
15984 
15985 /*
15986 ** The xConnect() and xCreate() methods for the virtual table. All the
15987 ** work is done in function fts5InitVtab().
15988 */
15989 static int fts5ConnectMethod(
15990   sqlite3 *db,                    /* Database connection */
15991   void *pAux,                     /* Pointer to tokenizer hash table */
15992   int argc,                       /* Number of elements in argv array */
15993   const char * const *argv,       /* xCreate/xConnect argument array */
15994   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
15995   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
15996 ){
15997   return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr);
15998 }
15999 static int fts5CreateMethod(
16000   sqlite3 *db,                    /* Database connection */
16001   void *pAux,                     /* Pointer to tokenizer hash table */
16002   int argc,                       /* Number of elements in argv array */
16003   const char * const *argv,       /* xCreate/xConnect argument array */
16004   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
16005   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
16006 ){
16007   return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr);
16008 }
16009 
16010 /*
16011 ** The different query plans.
16012 */
16013 #define FTS5_PLAN_MATCH          1       /* (<tbl> MATCH ?) */
16014 #define FTS5_PLAN_SOURCE         2       /* A source cursor for SORTED_MATCH */
16015 #define FTS5_PLAN_SPECIAL        3       /* An internal query */
16016 #define FTS5_PLAN_SORTED_MATCH   4       /* (<tbl> MATCH ? ORDER BY rank) */
16017 #define FTS5_PLAN_SCAN           5       /* No usable constraint */
16018 #define FTS5_PLAN_ROWID          6       /* (rowid = ?) */
16019 
16020 /*
16021 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this
16022 ** extension is currently being used by a version of SQLite too old to
16023 ** support index-info flags. In that case this function is a no-op.
16024 */
16025 static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){
16026 #if SQLITE_VERSION_NUMBER>=3008012
16027 #ifndef SQLITE_CORE
16028   if( sqlite3_libversion_number()>=3008012 )
16029 #endif
16030   {
16031     pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE;
16032   }
16033 #endif
16034 }
16035 
16036 static int fts5UsePatternMatch(
16037   Fts5Config *pConfig,
16038   struct sqlite3_index_constraint *p
16039 ){
16040   assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB );
16041   assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE );
16042   if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){
16043     return 1;
16044   }
16045   if( pConfig->ePattern==FTS5_PATTERN_LIKE
16046    && (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB)
16047   ){
16048     return 1;
16049   }
16050   return 0;
16051 }
16052 
16053 /*
16054 ** Implementation of the xBestIndex method for FTS5 tables. Within the
16055 ** WHERE constraint, it searches for the following:
16056 **
16057 **   1. A MATCH constraint against the table column.
16058 **   2. A MATCH constraint against the "rank" column.
16059 **   3. A MATCH constraint against some other column.
16060 **   4. An == constraint against the rowid column.
16061 **   5. A < or <= constraint against the rowid column.
16062 **   6. A > or >= constraint against the rowid column.
16063 **
16064 ** Within the ORDER BY, the following are supported:
16065 **
16066 **   5. ORDER BY rank [ASC|DESC]
16067 **   6. ORDER BY rowid [ASC|DESC]
16068 **
16069 ** Information for the xFilter call is passed via both the idxNum and
16070 ** idxStr variables. Specifically, idxNum is a bitmask of the following
16071 ** flags used to encode the ORDER BY clause:
16072 **
16073 **     FTS5_BI_ORDER_RANK
16074 **     FTS5_BI_ORDER_ROWID
16075 **     FTS5_BI_ORDER_DESC
16076 **
16077 ** idxStr is used to encode data from the WHERE clause. For each argument
16078 ** passed to the xFilter method, the following is appended to idxStr:
16079 **
16080 **   Match against table column:            "m"
16081 **   Match against rank column:             "r"
16082 **   Match against other column:            "M<column-number>"
16083 **   LIKE  against other column:            "L<column-number>"
16084 **   GLOB  against other column:            "G<column-number>"
16085 **   Equality constraint against the rowid: "="
16086 **   A < or <= against the rowid:           "<"
16087 **   A > or >= against the rowid:           ">"
16088 **
16089 ** This function ensures that there is at most one "r" or "=". And that if
16090 ** there exists an "=" then there is no "<" or ">".
16091 **
16092 ** Costs are assigned as follows:
16093 **
16094 **  a) If an unusable MATCH operator is present in the WHERE clause, the
16095 **     cost is unconditionally set to 1e50 (a really big number).
16096 **
16097 **  a) If a MATCH operator is present, the cost depends on the other
16098 **     constraints also present. As follows:
16099 **
16100 **       * No other constraints:         cost=1000.0
16101 **       * One rowid range constraint:   cost=750.0
16102 **       * Both rowid range constraints: cost=500.0
16103 **       * An == rowid constraint:       cost=100.0
16104 **
16105 **  b) Otherwise, if there is no MATCH:
16106 **
16107 **       * No other constraints:         cost=1000000.0
16108 **       * One rowid range constraint:   cost=750000.0
16109 **       * Both rowid range constraints: cost=250000.0
16110 **       * An == rowid constraint:       cost=10.0
16111 **
16112 ** Costs are not modified by the ORDER BY clause.
16113 */
16114 static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
16115   Fts5Table *pTab = (Fts5Table*)pVTab;
16116   Fts5Config *pConfig = pTab->pConfig;
16117   const int nCol = pConfig->nCol;
16118   int idxFlags = 0;               /* Parameter passed through to xFilter() */
16119   int i;
16120 
16121   char *idxStr;
16122   int iIdxStr = 0;
16123   int iCons = 0;
16124 
16125   int bSeenEq = 0;
16126   int bSeenGt = 0;
16127   int bSeenLt = 0;
16128   int bSeenMatch = 0;
16129   int bSeenRank = 0;
16130 
16131 
16132   assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH );
16133   assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH );
16134   assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH );
16135   assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH );
16136   assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH );
16137 
16138   if( pConfig->bLock ){
16139     pTab->base.zErrMsg = sqlite3_mprintf(
16140         "recursively defined fts5 content table"
16141     );
16142     return SQLITE_ERROR;
16143   }
16144 
16145   idxStr = (char*)sqlite3_malloc(pInfo->nConstraint * 8 + 1);
16146   if( idxStr==0 ) return SQLITE_NOMEM;
16147   pInfo->idxStr = idxStr;
16148   pInfo->needToFreeIdxStr = 1;
16149 
16150   for(i=0; i<pInfo->nConstraint; i++){
16151     struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
16152     int iCol = p->iColumn;
16153     if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH
16154      || (p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol>=nCol)
16155     ){
16156       /* A MATCH operator or equivalent */
16157       if( p->usable==0 || iCol<0 ){
16158         /* As there exists an unusable MATCH constraint this is an
16159         ** unusable plan. Set a prohibitively high cost. */
16160         pInfo->estimatedCost = 1e50;
16161         assert( iIdxStr < pInfo->nConstraint*6 + 1 );
16162         idxStr[iIdxStr] = 0;
16163         return SQLITE_OK;
16164       }else{
16165         if( iCol==nCol+1 ){
16166           if( bSeenRank ) continue;
16167           idxStr[iIdxStr++] = 'r';
16168           bSeenRank = 1;
16169         }else if( iCol>=0 ){
16170           bSeenMatch = 1;
16171           idxStr[iIdxStr++] = 'M';
16172           sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
16173           idxStr += strlen(&idxStr[iIdxStr]);
16174           assert( idxStr[iIdxStr]=='\0' );
16175         }
16176         pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16177         pInfo->aConstraintUsage[i].omit = 1;
16178       }
16179     }else if( p->usable ){
16180       if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){
16181         assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB );
16182         idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE ? 'L' : 'G';
16183         sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol);
16184         idxStr += strlen(&idxStr[iIdxStr]);
16185         pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16186         assert( idxStr[iIdxStr]=='\0' );
16187       }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){
16188         idxStr[iIdxStr++] = '=';
16189         bSeenEq = 1;
16190         pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16191       }
16192     }
16193   }
16194 
16195   if( bSeenEq==0 ){
16196     for(i=0; i<pInfo->nConstraint; i++){
16197       struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
16198       if( p->iColumn<0 && p->usable ){
16199         int op = p->op;
16200         if( op==SQLITE_INDEX_CONSTRAINT_LT || op==SQLITE_INDEX_CONSTRAINT_LE ){
16201           if( bSeenLt ) continue;
16202           idxStr[iIdxStr++] = '<';
16203           pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16204           bSeenLt = 1;
16205         }else
16206         if( op==SQLITE_INDEX_CONSTRAINT_GT || op==SQLITE_INDEX_CONSTRAINT_GE ){
16207           if( bSeenGt ) continue;
16208           idxStr[iIdxStr++] = '>';
16209           pInfo->aConstraintUsage[i].argvIndex = ++iCons;
16210           bSeenGt = 1;
16211         }
16212       }
16213     }
16214   }
16215   idxStr[iIdxStr] = '\0';
16216 
16217   /* Set idxFlags flags for the ORDER BY clause */
16218   if( pInfo->nOrderBy==1 ){
16219     int iSort = pInfo->aOrderBy[0].iColumn;
16220     if( iSort==(pConfig->nCol+1) && bSeenMatch ){
16221       idxFlags |= FTS5_BI_ORDER_RANK;
16222     }else if( iSort==-1 ){
16223       idxFlags |= FTS5_BI_ORDER_ROWID;
16224     }
16225     if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){
16226       pInfo->orderByConsumed = 1;
16227       if( pInfo->aOrderBy[0].desc ){
16228         idxFlags |= FTS5_BI_ORDER_DESC;
16229       }
16230     }
16231   }
16232 
16233   /* Calculate the estimated cost based on the flags set in idxFlags. */
16234   if( bSeenEq ){
16235     pInfo->estimatedCost = bSeenMatch ? 100.0 : 10.0;
16236     if( bSeenMatch==0 ) fts5SetUniqueFlag(pInfo);
16237   }else if( bSeenLt && bSeenGt ){
16238     pInfo->estimatedCost = bSeenMatch ? 500.0 : 250000.0;
16239   }else if( bSeenLt || bSeenGt ){
16240     pInfo->estimatedCost = bSeenMatch ? 750.0 : 750000.0;
16241   }else{
16242     pInfo->estimatedCost = bSeenMatch ? 1000.0 : 1000000.0;
16243   }
16244 
16245   pInfo->idxNum = idxFlags;
16246   return SQLITE_OK;
16247 }
16248 
16249 static int fts5NewTransaction(Fts5FullTable *pTab){
16250   Fts5Cursor *pCsr;
16251   for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
16252     if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK;
16253   }
16254   return sqlite3Fts5StorageReset(pTab->pStorage);
16255 }
16256 
16257 /*
16258 ** Implementation of xOpen method.
16259 */
16260 static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){
16261   Fts5FullTable *pTab = (Fts5FullTable*)pVTab;
16262   Fts5Config *pConfig = pTab->p.pConfig;
16263   Fts5Cursor *pCsr = 0;           /* New cursor object */
16264   sqlite3_int64 nByte;            /* Bytes of space to allocate */
16265   int rc;                         /* Return code */
16266 
16267   rc = fts5NewTransaction(pTab);
16268   if( rc==SQLITE_OK ){
16269     nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int);
16270     pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte);
16271     if( pCsr ){
16272       Fts5Global *pGlobal = pTab->pGlobal;
16273       memset(pCsr, 0, (size_t)nByte);
16274       pCsr->aColumnSize = (int*)&pCsr[1];
16275       pCsr->pNext = pGlobal->pCsr;
16276       pGlobal->pCsr = pCsr;
16277       pCsr->iCsrId = ++pGlobal->iNextId;
16278     }else{
16279       rc = SQLITE_NOMEM;
16280     }
16281   }
16282   *ppCsr = (sqlite3_vtab_cursor*)pCsr;
16283   return rc;
16284 }
16285 
16286 static int fts5StmtType(Fts5Cursor *pCsr){
16287   if( pCsr->ePlan==FTS5_PLAN_SCAN ){
16288     return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC;
16289   }
16290   return FTS5_STMT_LOOKUP;
16291 }
16292 
16293 /*
16294 ** This function is called after the cursor passed as the only argument
16295 ** is moved to point at a different row. It clears all cached data
16296 ** specific to the previous row stored by the cursor object.
16297 */
16298 static void fts5CsrNewrow(Fts5Cursor *pCsr){
16299   CsrFlagSet(pCsr,
16300       FTS5CSR_REQUIRE_CONTENT
16301     | FTS5CSR_REQUIRE_DOCSIZE
16302     | FTS5CSR_REQUIRE_INST
16303     | FTS5CSR_REQUIRE_POSLIST
16304   );
16305 }
16306 
16307 static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
16308   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16309   Fts5Auxdata *pData;
16310   Fts5Auxdata *pNext;
16311 
16312   sqlite3_free(pCsr->aInstIter);
16313   sqlite3_free(pCsr->aInst);
16314   if( pCsr->pStmt ){
16315     int eStmt = fts5StmtType(pCsr);
16316     sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
16317   }
16318   if( pCsr->pSorter ){
16319     Fts5Sorter *pSorter = pCsr->pSorter;
16320     sqlite3_finalize(pSorter->pStmt);
16321     sqlite3_free(pSorter);
16322   }
16323 
16324   if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){
16325     sqlite3Fts5ExprFree(pCsr->pExpr);
16326   }
16327 
16328   for(pData=pCsr->pAuxdata; pData; pData=pNext){
16329     pNext = pData->pNext;
16330     if( pData->xDelete ) pData->xDelete(pData->pPtr);
16331     sqlite3_free(pData);
16332   }
16333 
16334   sqlite3_finalize(pCsr->pRankArgStmt);
16335   sqlite3_free(pCsr->apRankArg);
16336 
16337   if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){
16338     sqlite3_free(pCsr->zRank);
16339     sqlite3_free(pCsr->zRankArgs);
16340   }
16341 
16342   sqlite3Fts5IndexCloseReader(pTab->p.pIndex);
16343   memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr));
16344 }
16345 
16346 
16347 /*
16348 ** Close the cursor.  For additional information see the documentation
16349 ** on the xClose method of the virtual table interface.
16350 */
16351 static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){
16352   if( pCursor ){
16353     Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
16354     Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16355     Fts5Cursor **pp;
16356 
16357     fts5FreeCursorComponents(pCsr);
16358     /* Remove the cursor from the Fts5Global.pCsr list */
16359     for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
16360     *pp = pCsr->pNext;
16361 
16362     sqlite3_free(pCsr);
16363   }
16364   return SQLITE_OK;
16365 }
16366 
16367 static int fts5SorterNext(Fts5Cursor *pCsr){
16368   Fts5Sorter *pSorter = pCsr->pSorter;
16369   int rc;
16370 
16371   rc = sqlite3_step(pSorter->pStmt);
16372   if( rc==SQLITE_DONE ){
16373     rc = SQLITE_OK;
16374     CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT);
16375   }else if( rc==SQLITE_ROW ){
16376     const u8 *a;
16377     const u8 *aBlob;
16378     int nBlob;
16379     int i;
16380     int iOff = 0;
16381     rc = SQLITE_OK;
16382 
16383     pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0);
16384     nBlob = sqlite3_column_bytes(pSorter->pStmt, 1);
16385     aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1);
16386 
16387     /* nBlob==0 in detail=none mode. */
16388     if( nBlob>0 ){
16389       for(i=0; i<(pSorter->nIdx-1); i++){
16390         int iVal;
16391         a += fts5GetVarint32(a, iVal);
16392         iOff += iVal;
16393         pSorter->aIdx[i] = iOff;
16394       }
16395       pSorter->aIdx[i] = &aBlob[nBlob] - a;
16396       pSorter->aPoslist = a;
16397     }
16398 
16399     fts5CsrNewrow(pCsr);
16400   }
16401 
16402   return rc;
16403 }
16404 
16405 
16406 /*
16407 ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors
16408 ** open on table pTab.
16409 */
16410 static void fts5TripCursors(Fts5FullTable *pTab){
16411   Fts5Cursor *pCsr;
16412   for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
16413     if( pCsr->ePlan==FTS5_PLAN_MATCH
16414      && pCsr->base.pVtab==(sqlite3_vtab*)pTab
16415     ){
16416       CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK);
16417     }
16418   }
16419 }
16420 
16421 /*
16422 ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first
16423 ** argument, close and reopen all Fts5IndexIter iterators that the cursor
16424 ** is using. Then attempt to move the cursor to a rowid equal to or laster
16425 ** (in the cursors sort order - ASC or DESC) than the current rowid.
16426 **
16427 ** If the new rowid is not equal to the old, set output parameter *pbSkip
16428 ** to 1 before returning. Otherwise, leave it unchanged.
16429 **
16430 ** Return SQLITE_OK if successful or if no reseek was required, or an
16431 ** error code if an error occurred.
16432 */
16433 static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){
16434   int rc = SQLITE_OK;
16435   assert( *pbSkip==0 );
16436   if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){
16437     Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16438     int bDesc = pCsr->bDesc;
16439     i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr);
16440 
16441     rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc);
16442     if( rc==SQLITE_OK &&  iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){
16443       *pbSkip = 1;
16444     }
16445 
16446     CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK);
16447     fts5CsrNewrow(pCsr);
16448     if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
16449       CsrFlagSet(pCsr, FTS5CSR_EOF);
16450       *pbSkip = 1;
16451     }
16452   }
16453   return rc;
16454 }
16455 
16456 
16457 /*
16458 ** Advance the cursor to the next row in the table that matches the
16459 ** search criteria.
16460 **
16461 ** Return SQLITE_OK if nothing goes wrong.  SQLITE_OK is returned
16462 ** even if we reach end-of-file.  The fts5EofMethod() will be called
16463 ** subsequently to determine whether or not an EOF was hit.
16464 */
16465 static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){
16466   Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16467   int rc;
16468 
16469   assert( (pCsr->ePlan<3)==
16470           (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE)
16471   );
16472   assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) );
16473 
16474   if( pCsr->ePlan<3 ){
16475     int bSkip = 0;
16476     if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc;
16477     rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid);
16478     CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr));
16479     fts5CsrNewrow(pCsr);
16480   }else{
16481     switch( pCsr->ePlan ){
16482       case FTS5_PLAN_SPECIAL: {
16483         CsrFlagSet(pCsr, FTS5CSR_EOF);
16484         rc = SQLITE_OK;
16485         break;
16486       }
16487 
16488       case FTS5_PLAN_SORTED_MATCH: {
16489         rc = fts5SorterNext(pCsr);
16490         break;
16491       }
16492 
16493       default: {
16494         Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig;
16495         pConfig->bLock++;
16496         rc = sqlite3_step(pCsr->pStmt);
16497         pConfig->bLock--;
16498         if( rc!=SQLITE_ROW ){
16499           CsrFlagSet(pCsr, FTS5CSR_EOF);
16500           rc = sqlite3_reset(pCsr->pStmt);
16501           if( rc!=SQLITE_OK ){
16502             pCursor->pVtab->zErrMsg = sqlite3_mprintf(
16503                 "%s", sqlite3_errmsg(pConfig->db)
16504             );
16505           }
16506         }else{
16507           rc = SQLITE_OK;
16508         }
16509         break;
16510       }
16511     }
16512   }
16513 
16514   return rc;
16515 }
16516 
16517 
16518 static int fts5PrepareStatement(
16519   sqlite3_stmt **ppStmt,
16520   Fts5Config *pConfig,
16521   const char *zFmt,
16522   ...
16523 ){
16524   sqlite3_stmt *pRet = 0;
16525   int rc;
16526   char *zSql;
16527   va_list ap;
16528 
16529   va_start(ap, zFmt);
16530   zSql = sqlite3_vmprintf(zFmt, ap);
16531   if( zSql==0 ){
16532     rc = SQLITE_NOMEM;
16533   }else{
16534     rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
16535                             SQLITE_PREPARE_PERSISTENT, &pRet, 0);
16536     if( rc!=SQLITE_OK ){
16537       *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db));
16538     }
16539     sqlite3_free(zSql);
16540   }
16541 
16542   va_end(ap);
16543   *ppStmt = pRet;
16544   return rc;
16545 }
16546 
16547 static int fts5CursorFirstSorted(
16548   Fts5FullTable *pTab,
16549   Fts5Cursor *pCsr,
16550   int bDesc
16551 ){
16552   Fts5Config *pConfig = pTab->p.pConfig;
16553   Fts5Sorter *pSorter;
16554   int nPhrase;
16555   sqlite3_int64 nByte;
16556   int rc;
16557   const char *zRank = pCsr->zRank;
16558   const char *zRankArgs = pCsr->zRankArgs;
16559 
16560   nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
16561   nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1);
16562   pSorter = (Fts5Sorter*)sqlite3_malloc64(nByte);
16563   if( pSorter==0 ) return SQLITE_NOMEM;
16564   memset(pSorter, 0, (size_t)nByte);
16565   pSorter->nIdx = nPhrase;
16566 
16567   /* TODO: It would be better to have some system for reusing statement
16568   ** handles here, rather than preparing a new one for each query. But that
16569   ** is not possible as SQLite reference counts the virtual table objects.
16570   ** And since the statement required here reads from this very virtual
16571   ** table, saving it creates a circular reference.
16572   **
16573   ** If SQLite a built-in statement cache, this wouldn't be a problem. */
16574   rc = fts5PrepareStatement(&pSorter->pStmt, pConfig,
16575       "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s",
16576       pConfig->zDb, pConfig->zName, zRank, pConfig->zName,
16577       (zRankArgs ? ", " : ""),
16578       (zRankArgs ? zRankArgs : ""),
16579       bDesc ? "DESC" : "ASC"
16580   );
16581 
16582   pCsr->pSorter = pSorter;
16583   if( rc==SQLITE_OK ){
16584     assert( pTab->pSortCsr==0 );
16585     pTab->pSortCsr = pCsr;
16586     rc = fts5SorterNext(pCsr);
16587     pTab->pSortCsr = 0;
16588   }
16589 
16590   if( rc!=SQLITE_OK ){
16591     sqlite3_finalize(pSorter->pStmt);
16592     sqlite3_free(pSorter);
16593     pCsr->pSorter = 0;
16594   }
16595 
16596   return rc;
16597 }
16598 
16599 static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){
16600   int rc;
16601   Fts5Expr *pExpr = pCsr->pExpr;
16602   rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc);
16603   if( sqlite3Fts5ExprEof(pExpr) ){
16604     CsrFlagSet(pCsr, FTS5CSR_EOF);
16605   }
16606   fts5CsrNewrow(pCsr);
16607   return rc;
16608 }
16609 
16610 /*
16611 ** Process a "special" query. A special query is identified as one with a
16612 ** MATCH expression that begins with a '*' character. The remainder of
16613 ** the text passed to the MATCH operator are used as  the special query
16614 ** parameters.
16615 */
16616 static int fts5SpecialMatch(
16617   Fts5FullTable *pTab,
16618   Fts5Cursor *pCsr,
16619   const char *zQuery
16620 ){
16621   int rc = SQLITE_OK;             /* Return code */
16622   const char *z = zQuery;         /* Special query text */
16623   int n;                          /* Number of bytes in text at z */
16624 
16625   while( z[0]==' ' ) z++;
16626   for(n=0; z[n] && z[n]!=' '; n++);
16627 
16628   assert( pTab->p.base.zErrMsg==0 );
16629   pCsr->ePlan = FTS5_PLAN_SPECIAL;
16630 
16631   if( n==5 && 0==sqlite3_strnicmp("reads", z, n) ){
16632     pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex);
16633   }
16634   else if( n==2 && 0==sqlite3_strnicmp("id", z, n) ){
16635     pCsr->iSpecial = pCsr->iCsrId;
16636   }
16637   else{
16638     /* An unrecognized directive. Return an error message. */
16639     pTab->p.base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z);
16640     rc = SQLITE_ERROR;
16641   }
16642 
16643   return rc;
16644 }
16645 
16646 /*
16647 ** Search for an auxiliary function named zName that can be used with table
16648 ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary
16649 ** structure. Otherwise, if no such function exists, return NULL.
16650 */
16651 static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){
16652   Fts5Auxiliary *pAux;
16653 
16654   for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){
16655     if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux;
16656   }
16657 
16658   /* No function of the specified name was found. Return 0. */
16659   return 0;
16660 }
16661 
16662 
16663 static int fts5FindRankFunction(Fts5Cursor *pCsr){
16664   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
16665   Fts5Config *pConfig = pTab->p.pConfig;
16666   int rc = SQLITE_OK;
16667   Fts5Auxiliary *pAux = 0;
16668   const char *zRank = pCsr->zRank;
16669   const char *zRankArgs = pCsr->zRankArgs;
16670 
16671   if( zRankArgs ){
16672     char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs);
16673     if( zSql ){
16674       sqlite3_stmt *pStmt = 0;
16675       rc = sqlite3_prepare_v3(pConfig->db, zSql, -1,
16676                               SQLITE_PREPARE_PERSISTENT, &pStmt, 0);
16677       sqlite3_free(zSql);
16678       assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 );
16679       if( rc==SQLITE_OK ){
16680         if( SQLITE_ROW==sqlite3_step(pStmt) ){
16681           sqlite3_int64 nByte;
16682           pCsr->nRankArg = sqlite3_column_count(pStmt);
16683           nByte = sizeof(sqlite3_value*)*pCsr->nRankArg;
16684           pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte);
16685           if( rc==SQLITE_OK ){
16686             int i;
16687             for(i=0; i<pCsr->nRankArg; i++){
16688               pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i);
16689             }
16690           }
16691           pCsr->pRankArgStmt = pStmt;
16692         }else{
16693           rc = sqlite3_finalize(pStmt);
16694           assert( rc!=SQLITE_OK );
16695         }
16696       }
16697     }
16698   }
16699 
16700   if( rc==SQLITE_OK ){
16701     pAux = fts5FindAuxiliary(pTab, zRank);
16702     if( pAux==0 ){
16703       assert( pTab->p.base.zErrMsg==0 );
16704       pTab->p.base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank);
16705       rc = SQLITE_ERROR;
16706     }
16707   }
16708 
16709   pCsr->pRank = pAux;
16710   return rc;
16711 }
16712 
16713 
16714 static int fts5CursorParseRank(
16715   Fts5Config *pConfig,
16716   Fts5Cursor *pCsr,
16717   sqlite3_value *pRank
16718 ){
16719   int rc = SQLITE_OK;
16720   if( pRank ){
16721     const char *z = (const char*)sqlite3_value_text(pRank);
16722     char *zRank = 0;
16723     char *zRankArgs = 0;
16724 
16725     if( z==0 ){
16726       if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR;
16727     }else{
16728       rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs);
16729     }
16730     if( rc==SQLITE_OK ){
16731       pCsr->zRank = zRank;
16732       pCsr->zRankArgs = zRankArgs;
16733       CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK);
16734     }else if( rc==SQLITE_ERROR ){
16735       pCsr->base.pVtab->zErrMsg = sqlite3_mprintf(
16736           "parse error in rank function: %s", z
16737       );
16738     }
16739   }else{
16740     if( pConfig->zRank ){
16741       pCsr->zRank = (char*)pConfig->zRank;
16742       pCsr->zRankArgs = (char*)pConfig->zRankArgs;
16743     }else{
16744       pCsr->zRank = (char*)FTS5_DEFAULT_RANK;
16745       pCsr->zRankArgs = 0;
16746     }
16747   }
16748   return rc;
16749 }
16750 
16751 static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){
16752   if( pVal ){
16753     int eType = sqlite3_value_numeric_type(pVal);
16754     if( eType==SQLITE_INTEGER ){
16755       return sqlite3_value_int64(pVal);
16756     }
16757   }
16758   return iDefault;
16759 }
16760 
16761 /*
16762 ** This is the xFilter interface for the virtual table.  See
16763 ** the virtual table xFilter method documentation for additional
16764 ** information.
16765 **
16766 ** There are three possible query strategies:
16767 **
16768 **   1. Full-text search using a MATCH operator.
16769 **   2. A by-rowid lookup.
16770 **   3. A full-table scan.
16771 */
16772 static int fts5FilterMethod(
16773   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
16774   int idxNum,                     /* Strategy index */
16775   const char *idxStr,             /* Unused */
16776   int nVal,                       /* Number of elements in apVal */
16777   sqlite3_value **apVal           /* Arguments for the indexing scheme */
16778 ){
16779   Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
16780   Fts5Config *pConfig = pTab->p.pConfig;
16781   Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16782   int rc = SQLITE_OK;             /* Error code */
16783   int bDesc;                      /* True if ORDER BY [rank|rowid] DESC */
16784   int bOrderByRank;               /* True if ORDER BY rank */
16785   sqlite3_value *pRank = 0;       /* rank MATCH ? expression (or NULL) */
16786   sqlite3_value *pRowidEq = 0;    /* rowid = ? expression (or NULL) */
16787   sqlite3_value *pRowidLe = 0;    /* rowid <= ? expression (or NULL) */
16788   sqlite3_value *pRowidGe = 0;    /* rowid >= ? expression (or NULL) */
16789   int iCol;                       /* Column on LHS of MATCH operator */
16790   char **pzErrmsg = pConfig->pzErrmsg;
16791   int i;
16792   int iIdxStr = 0;
16793   Fts5Expr *pExpr = 0;
16794 
16795   if( pConfig->bLock ){
16796     pTab->p.base.zErrMsg = sqlite3_mprintf(
16797         "recursively defined fts5 content table"
16798     );
16799     return SQLITE_ERROR;
16800   }
16801 
16802   if( pCsr->ePlan ){
16803     fts5FreeCursorComponents(pCsr);
16804     memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr));
16805   }
16806 
16807   assert( pCsr->pStmt==0 );
16808   assert( pCsr->pExpr==0 );
16809   assert( pCsr->csrflags==0 );
16810   assert( pCsr->pRank==0 );
16811   assert( pCsr->zRank==0 );
16812   assert( pCsr->zRankArgs==0 );
16813   assert( pTab->pSortCsr==0 || nVal==0 );
16814 
16815   assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg );
16816   pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
16817 
16818   /* Decode the arguments passed through to this function. */
16819   for(i=0; i<nVal; i++){
16820     switch( idxStr[iIdxStr++] ){
16821       case 'r':
16822         pRank = apVal[i];
16823         break;
16824       case 'M': {
16825         const char *zText = (const char*)sqlite3_value_text(apVal[i]);
16826         if( zText==0 ) zText = "";
16827         iCol = 0;
16828         do{
16829           iCol = iCol*10 + (idxStr[iIdxStr]-'0');
16830           iIdxStr++;
16831         }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
16832 
16833         if( zText[0]=='*' ){
16834           /* The user has issued a query of the form "MATCH '*...'". This
16835           ** indicates that the MATCH expression is not a full text query,
16836           ** but a request for an internal parameter.  */
16837           rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
16838           goto filter_out;
16839         }else{
16840           char **pzErr = &pTab->p.base.zErrMsg;
16841           rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
16842           if( rc==SQLITE_OK ){
16843             rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
16844             pExpr = 0;
16845           }
16846           if( rc!=SQLITE_OK ) goto filter_out;
16847         }
16848 
16849         break;
16850       }
16851       case 'L':
16852       case 'G': {
16853         int bGlob = (idxStr[iIdxStr-1]=='G');
16854         const char *zText = (const char*)sqlite3_value_text(apVal[i]);
16855         iCol = 0;
16856         do{
16857           iCol = iCol*10 + (idxStr[iIdxStr]-'0');
16858           iIdxStr++;
16859         }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' );
16860         if( zText ){
16861           rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr);
16862         }
16863         if( rc==SQLITE_OK ){
16864           rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
16865           pExpr = 0;
16866         }
16867         if( rc!=SQLITE_OK ) goto filter_out;
16868         break;
16869       }
16870       case '=':
16871         pRowidEq = apVal[i];
16872         break;
16873       case '<':
16874         pRowidLe = apVal[i];
16875         break;
16876       default: assert( idxStr[iIdxStr-1]=='>' );
16877         pRowidGe = apVal[i];
16878         break;
16879     }
16880   }
16881   bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0);
16882   pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0);
16883 
16884   /* Set the cursor upper and lower rowid limits. Only some strategies
16885   ** actually use them. This is ok, as the xBestIndex() method leaves the
16886   ** sqlite3_index_constraint.omit flag clear for range constraints
16887   ** on the rowid field.  */
16888   if( pRowidEq ){
16889     pRowidLe = pRowidGe = pRowidEq;
16890   }
16891   if( bDesc ){
16892     pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
16893     pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
16894   }else{
16895     pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64);
16896     pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64);
16897   }
16898 
16899   if( pTab->pSortCsr ){
16900     /* If pSortCsr is non-NULL, then this call is being made as part of
16901     ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is
16902     ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will
16903     ** return results to the user for this query. The current cursor
16904     ** (pCursor) is used to execute the query issued by function
16905     ** fts5CursorFirstSorted() above.  */
16906     assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 );
16907     assert( nVal==0 && bOrderByRank==0 && bDesc==0 );
16908     assert( pCsr->iLastRowid==LARGEST_INT64 );
16909     assert( pCsr->iFirstRowid==SMALLEST_INT64 );
16910     if( pTab->pSortCsr->bDesc ){
16911       pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid;
16912       pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid;
16913     }else{
16914       pCsr->iLastRowid = pTab->pSortCsr->iLastRowid;
16915       pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid;
16916     }
16917     pCsr->ePlan = FTS5_PLAN_SOURCE;
16918     pCsr->pExpr = pTab->pSortCsr->pExpr;
16919     rc = fts5CursorFirst(pTab, pCsr, bDesc);
16920   }else if( pCsr->pExpr ){
16921     rc = fts5CursorParseRank(pConfig, pCsr, pRank);
16922     if( rc==SQLITE_OK ){
16923       if( bOrderByRank ){
16924         pCsr->ePlan = FTS5_PLAN_SORTED_MATCH;
16925         rc = fts5CursorFirstSorted(pTab, pCsr, bDesc);
16926       }else{
16927         pCsr->ePlan = FTS5_PLAN_MATCH;
16928         rc = fts5CursorFirst(pTab, pCsr, bDesc);
16929       }
16930     }
16931   }else if( pConfig->zContent==0 ){
16932     *pConfig->pzErrmsg = sqlite3_mprintf(
16933         "%s: table does not support scanning", pConfig->zName
16934     );
16935     rc = SQLITE_ERROR;
16936   }else{
16937     /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup
16938     ** by rowid (ePlan==FTS5_PLAN_ROWID).  */
16939     pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN);
16940     rc = sqlite3Fts5StorageStmt(
16941         pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg
16942     );
16943     if( rc==SQLITE_OK ){
16944       if( pRowidEq!=0 ){
16945         assert( pCsr->ePlan==FTS5_PLAN_ROWID );
16946         sqlite3_bind_value(pCsr->pStmt, 1, pRowidEq);
16947       }else{
16948         sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid);
16949         sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid);
16950       }
16951       rc = fts5NextMethod(pCursor);
16952     }
16953   }
16954 
16955  filter_out:
16956   sqlite3Fts5ExprFree(pExpr);
16957   pConfig->pzErrmsg = pzErrmsg;
16958   return rc;
16959 }
16960 
16961 /*
16962 ** This is the xEof method of the virtual table. SQLite calls this
16963 ** routine to find out if it has reached the end of a result set.
16964 */
16965 static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){
16966   Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16967   return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0);
16968 }
16969 
16970 /*
16971 ** Return the rowid that the cursor currently points to.
16972 */
16973 static i64 fts5CursorRowid(Fts5Cursor *pCsr){
16974   assert( pCsr->ePlan==FTS5_PLAN_MATCH
16975        || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
16976        || pCsr->ePlan==FTS5_PLAN_SOURCE
16977   );
16978   if( pCsr->pSorter ){
16979     return pCsr->pSorter->iRowid;
16980   }else{
16981     return sqlite3Fts5ExprRowid(pCsr->pExpr);
16982   }
16983 }
16984 
16985 /*
16986 ** This is the xRowid method. The SQLite core calls this routine to
16987 ** retrieve the rowid for the current row of the result set. fts5
16988 ** exposes %_content.rowid as the rowid for the virtual table. The
16989 ** rowid should be written to *pRowid.
16990 */
16991 static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
16992   Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
16993   int ePlan = pCsr->ePlan;
16994 
16995   assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
16996   switch( ePlan ){
16997     case FTS5_PLAN_SPECIAL:
16998       *pRowid = 0;
16999       break;
17000 
17001     case FTS5_PLAN_SOURCE:
17002     case FTS5_PLAN_MATCH:
17003     case FTS5_PLAN_SORTED_MATCH:
17004       *pRowid = fts5CursorRowid(pCsr);
17005       break;
17006 
17007     default:
17008       *pRowid = sqlite3_column_int64(pCsr->pStmt, 0);
17009       break;
17010   }
17011 
17012   return SQLITE_OK;
17013 }
17014 
17015 /*
17016 ** If the cursor requires seeking (bSeekRequired flag is set), seek it.
17017 ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise.
17018 **
17019 ** If argument bErrormsg is true and an error occurs, an error message may
17020 ** be left in sqlite3_vtab.zErrMsg.
17021 */
17022 static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){
17023   int rc = SQLITE_OK;
17024 
17025   /* If the cursor does not yet have a statement handle, obtain one now. */
17026   if( pCsr->pStmt==0 ){
17027     Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17028     int eStmt = fts5StmtType(pCsr);
17029     rc = sqlite3Fts5StorageStmt(
17030         pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0)
17031     );
17032     assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 );
17033     assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) );
17034   }
17035 
17036   if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){
17037     Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
17038     assert( pCsr->pExpr );
17039     sqlite3_reset(pCsr->pStmt);
17040     sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr));
17041     pTab->pConfig->bLock++;
17042     rc = sqlite3_step(pCsr->pStmt);
17043     pTab->pConfig->bLock--;
17044     if( rc==SQLITE_ROW ){
17045       rc = SQLITE_OK;
17046       CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT);
17047     }else{
17048       rc = sqlite3_reset(pCsr->pStmt);
17049       if( rc==SQLITE_OK ){
17050         rc = FTS5_CORRUPT;
17051       }else if( pTab->pConfig->pzErrmsg ){
17052         *pTab->pConfig->pzErrmsg = sqlite3_mprintf(
17053             "%s", sqlite3_errmsg(pTab->pConfig->db)
17054         );
17055       }
17056     }
17057   }
17058   return rc;
17059 }
17060 
17061 static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){
17062   va_list ap;                     /* ... printf arguments */
17063   va_start(ap, zFormat);
17064   assert( p->p.base.zErrMsg==0 );
17065   p->p.base.zErrMsg = sqlite3_vmprintf(zFormat, ap);
17066   va_end(ap);
17067 }
17068 
17069 /*
17070 ** This function is called to handle an FTS INSERT command. In other words,
17071 ** an INSERT statement of the form:
17072 **
17073 **     INSERT INTO fts(fts) VALUES($pCmd)
17074 **     INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal)
17075 **
17076 ** Argument pVal is the value assigned to column "fts" by the INSERT
17077 ** statement. This function returns SQLITE_OK if successful, or an SQLite
17078 ** error code if an error occurs.
17079 **
17080 ** The commands implemented by this function are documented in the "Special
17081 ** INSERT Directives" section of the documentation. It should be updated if
17082 ** more commands are added to this function.
17083 */
17084 static int fts5SpecialInsert(
17085   Fts5FullTable *pTab,            /* Fts5 table object */
17086   const char *zCmd,               /* Text inserted into table-name column */
17087   sqlite3_value *pVal             /* Value inserted into rank column */
17088 ){
17089   Fts5Config *pConfig = pTab->p.pConfig;
17090   int rc = SQLITE_OK;
17091   int bError = 0;
17092 
17093   if( 0==sqlite3_stricmp("delete-all", zCmd) ){
17094     if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17095       fts5SetVtabError(pTab,
17096           "'delete-all' may only be used with a "
17097           "contentless or external content fts5 table"
17098       );
17099       rc = SQLITE_ERROR;
17100     }else{
17101       rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage);
17102     }
17103   }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){
17104     if( pConfig->eContent==FTS5_CONTENT_NONE ){
17105       fts5SetVtabError(pTab,
17106           "'rebuild' may not be used with a contentless fts5 table"
17107       );
17108       rc = SQLITE_ERROR;
17109     }else{
17110       rc = sqlite3Fts5StorageRebuild(pTab->pStorage);
17111     }
17112   }else if( 0==sqlite3_stricmp("optimize", zCmd) ){
17113     rc = sqlite3Fts5StorageOptimize(pTab->pStorage);
17114   }else if( 0==sqlite3_stricmp("merge", zCmd) ){
17115     int nMerge = sqlite3_value_int(pVal);
17116     rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge);
17117   }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){
17118     int iArg = sqlite3_value_int(pVal);
17119     rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg);
17120 #ifdef SQLITE_DEBUG
17121   }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){
17122     pConfig->bPrefixIndex = sqlite3_value_int(pVal);
17123 #endif
17124   }else{
17125     rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex);
17126     if( rc==SQLITE_OK ){
17127       rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError);
17128     }
17129     if( rc==SQLITE_OK ){
17130       if( bError ){
17131         rc = SQLITE_ERROR;
17132       }else{
17133         rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0);
17134       }
17135     }
17136   }
17137   return rc;
17138 }
17139 
17140 static int fts5SpecialDelete(
17141   Fts5FullTable *pTab,
17142   sqlite3_value **apVal
17143 ){
17144   int rc = SQLITE_OK;
17145   int eType1 = sqlite3_value_type(apVal[1]);
17146   if( eType1==SQLITE_INTEGER ){
17147     sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]);
17148     rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]);
17149   }
17150   return rc;
17151 }
17152 
17153 static void fts5StorageInsert(
17154   int *pRc,
17155   Fts5FullTable *pTab,
17156   sqlite3_value **apVal,
17157   i64 *piRowid
17158 ){
17159   int rc = *pRc;
17160   if( rc==SQLITE_OK ){
17161     rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid);
17162   }
17163   if( rc==SQLITE_OK ){
17164     rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid);
17165   }
17166   *pRc = rc;
17167 }
17168 
17169 /*
17170 ** This function is the implementation of the xUpdate callback used by
17171 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be
17172 ** inserted, updated or deleted.
17173 **
17174 ** A delete specifies a single argument - the rowid of the row to remove.
17175 **
17176 ** Update and insert operations pass:
17177 **
17178 **   1. The "old" rowid, or NULL.
17179 **   2. The "new" rowid.
17180 **   3. Values for each of the nCol matchable columns.
17181 **   4. Values for the two hidden columns (<tablename> and "rank").
17182 */
17183 static int fts5UpdateMethod(
17184   sqlite3_vtab *pVtab,            /* Virtual table handle */
17185   int nArg,                       /* Size of argument array */
17186   sqlite3_value **apVal,          /* Array of arguments */
17187   sqlite_int64 *pRowid            /* OUT: The affected (or effected) rowid */
17188 ){
17189   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17190   Fts5Config *pConfig = pTab->p.pConfig;
17191   int eType0;                     /* value_type() of apVal[0] */
17192   int rc = SQLITE_OK;             /* Return code */
17193 
17194   /* A transaction must be open when this is called. */
17195   assert( pTab->ts.eState==1 );
17196 
17197   assert( pVtab->zErrMsg==0 );
17198   assert( nArg==1 || nArg==(2+pConfig->nCol+2) );
17199   assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER
17200        || sqlite3_value_type(apVal[0])==SQLITE_NULL
17201   );
17202   assert( pTab->p.pConfig->pzErrmsg==0 );
17203   pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
17204 
17205   /* Put any active cursors into REQUIRE_SEEK state. */
17206   fts5TripCursors(pTab);
17207 
17208   eType0 = sqlite3_value_type(apVal[0]);
17209   if( eType0==SQLITE_NULL
17210    && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL
17211   ){
17212     /* A "special" INSERT op. These are handled separately. */
17213     const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]);
17214     if( pConfig->eContent!=FTS5_CONTENT_NORMAL
17215       && 0==sqlite3_stricmp("delete", z)
17216     ){
17217       rc = fts5SpecialDelete(pTab, apVal);
17218     }else{
17219       rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]);
17220     }
17221   }else{
17222     /* A regular INSERT, UPDATE or DELETE statement. The trick here is that
17223     ** any conflict on the rowid value must be detected before any
17224     ** modifications are made to the database file. There are 4 cases:
17225     **
17226     **   1) DELETE
17227     **   2) UPDATE (rowid not modified)
17228     **   3) UPDATE (rowid modified)
17229     **   4) INSERT
17230     **
17231     ** Cases 3 and 4 may violate the rowid constraint.
17232     */
17233     int eConflict = SQLITE_ABORT;
17234     if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
17235       eConflict = sqlite3_vtab_on_conflict(pConfig->db);
17236     }
17237 
17238     assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL );
17239     assert( nArg!=1 || eType0==SQLITE_INTEGER );
17240 
17241     /* Filter out attempts to run UPDATE or DELETE on contentless tables.
17242     ** This is not suported.  */
17243     if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){
17244       pTab->p.base.zErrMsg = sqlite3_mprintf(
17245           "cannot %s contentless fts5 table: %s",
17246           (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName
17247       );
17248       rc = SQLITE_ERROR;
17249     }
17250 
17251     /* DELETE */
17252     else if( nArg==1 ){
17253       i64 iDel = sqlite3_value_int64(apVal[0]);  /* Rowid to delete */
17254       rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0);
17255     }
17256 
17257     /* INSERT or UPDATE */
17258     else{
17259       int eType1 = sqlite3_value_numeric_type(apVal[1]);
17260 
17261       if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){
17262         rc = SQLITE_MISMATCH;
17263       }
17264 
17265       else if( eType0!=SQLITE_INTEGER ){
17266         /* If this is a REPLACE, first remove the current entry (if any) */
17267         if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){
17268           i64 iNew = sqlite3_value_int64(apVal[1]);  /* Rowid to delete */
17269           rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
17270         }
17271         fts5StorageInsert(&rc, pTab, apVal, pRowid);
17272       }
17273 
17274       /* UPDATE */
17275       else{
17276         i64 iOld = sqlite3_value_int64(apVal[0]);  /* Old rowid */
17277         i64 iNew = sqlite3_value_int64(apVal[1]);  /* New rowid */
17278         if( eType1==SQLITE_INTEGER && iOld!=iNew ){
17279           if( eConflict==SQLITE_REPLACE ){
17280             rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17281             if( rc==SQLITE_OK ){
17282               rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0);
17283             }
17284             fts5StorageInsert(&rc, pTab, apVal, pRowid);
17285           }else{
17286             rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid);
17287             if( rc==SQLITE_OK ){
17288               rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17289             }
17290             if( rc==SQLITE_OK ){
17291               rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid);
17292             }
17293           }
17294         }else{
17295           rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0);
17296           fts5StorageInsert(&rc, pTab, apVal, pRowid);
17297         }
17298       }
17299     }
17300   }
17301 
17302   pTab->p.pConfig->pzErrmsg = 0;
17303   return rc;
17304 }
17305 
17306 /*
17307 ** Implementation of xSync() method.
17308 */
17309 static int fts5SyncMethod(sqlite3_vtab *pVtab){
17310   int rc;
17311   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17312   fts5CheckTransactionState(pTab, FTS5_SYNC, 0);
17313   pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
17314   fts5TripCursors(pTab);
17315   rc = sqlite3Fts5StorageSync(pTab->pStorage);
17316   pTab->p.pConfig->pzErrmsg = 0;
17317   return rc;
17318 }
17319 
17320 /*
17321 ** Implementation of xBegin() method.
17322 */
17323 static int fts5BeginMethod(sqlite3_vtab *pVtab){
17324   fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0);
17325   fts5NewTransaction((Fts5FullTable*)pVtab);
17326   return SQLITE_OK;
17327 }
17328 
17329 /*
17330 ** Implementation of xCommit() method. This is a no-op. The contents of
17331 ** the pending-terms hash-table have already been flushed into the database
17332 ** by fts5SyncMethod().
17333 */
17334 static int fts5CommitMethod(sqlite3_vtab *pVtab){
17335   UNUSED_PARAM(pVtab);  /* Call below is a no-op for NDEBUG builds */
17336   fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0);
17337   return SQLITE_OK;
17338 }
17339 
17340 /*
17341 ** Implementation of xRollback(). Discard the contents of the pending-terms
17342 ** hash-table. Any changes made to the database are reverted by SQLite.
17343 */
17344 static int fts5RollbackMethod(sqlite3_vtab *pVtab){
17345   int rc;
17346   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
17347   fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0);
17348   rc = sqlite3Fts5StorageRollback(pTab->pStorage);
17349   return rc;
17350 }
17351 
17352 static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*);
17353 
17354 static void *fts5ApiUserData(Fts5Context *pCtx){
17355   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17356   return pCsr->pAux->pUserData;
17357 }
17358 
17359 static int fts5ApiColumnCount(Fts5Context *pCtx){
17360   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17361   return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
17362 }
17363 
17364 static int fts5ApiColumnTotalSize(
17365   Fts5Context *pCtx,
17366   int iCol,
17367   sqlite3_int64 *pnToken
17368 ){
17369   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17370   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17371   return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
17372 }
17373 
17374 static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
17375   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17376   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17377   return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
17378 }
17379 
17380 static int fts5ApiTokenize(
17381   Fts5Context *pCtx,
17382   const char *pText, int nText,
17383   void *pUserData,
17384   int (*xToken)(void*, int, const char*, int, int, int)
17385 ){
17386   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17387   Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
17388   return sqlite3Fts5Tokenize(
17389       pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken
17390   );
17391 }
17392 
17393 static int fts5ApiPhraseCount(Fts5Context *pCtx){
17394   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17395   return sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17396 }
17397 
17398 static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){
17399   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17400   return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase);
17401 }
17402 
17403 static int fts5ApiColumnText(
17404   Fts5Context *pCtx,
17405   int iCol,
17406   const char **pz,
17407   int *pn
17408 ){
17409   int rc = SQLITE_OK;
17410   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17411   if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab))
17412    || pCsr->ePlan==FTS5_PLAN_SPECIAL
17413   ){
17414     *pz = 0;
17415     *pn = 0;
17416   }else{
17417     rc = fts5SeekCursor(pCsr, 0);
17418     if( rc==SQLITE_OK ){
17419       *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1);
17420       *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
17421     }
17422   }
17423   return rc;
17424 }
17425 
17426 static int fts5CsrPoslist(
17427   Fts5Cursor *pCsr,
17428   int iPhrase,
17429   const u8 **pa,
17430   int *pn
17431 ){
17432   Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17433   int rc = SQLITE_OK;
17434   int bLive = (pCsr->pSorter==0);
17435 
17436   if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){
17437 
17438     if( pConfig->eDetail!=FTS5_DETAIL_FULL ){
17439       Fts5PoslistPopulator *aPopulator;
17440       int i;
17441       aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive);
17442       if( aPopulator==0 ) rc = SQLITE_NOMEM;
17443       for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){
17444         int n; const char *z;
17445         rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n);
17446         if( rc==SQLITE_OK ){
17447           rc = sqlite3Fts5ExprPopulatePoslists(
17448               pConfig, pCsr->pExpr, aPopulator, i, z, n
17449           );
17450         }
17451       }
17452       sqlite3_free(aPopulator);
17453 
17454       if( pCsr->pSorter ){
17455         sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid);
17456       }
17457     }
17458     CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST);
17459   }
17460 
17461   if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){
17462     Fts5Sorter *pSorter = pCsr->pSorter;
17463     int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
17464     *pn = pSorter->aIdx[iPhrase] - i1;
17465     *pa = &pSorter->aPoslist[i1];
17466   }else{
17467     *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa);
17468   }
17469 
17470   return rc;
17471 }
17472 
17473 /*
17474 ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated
17475 ** correctly for the current view. Return SQLITE_OK if successful, or an
17476 ** SQLite error code otherwise.
17477 */
17478 static int fts5CacheInstArray(Fts5Cursor *pCsr){
17479   int rc = SQLITE_OK;
17480   Fts5PoslistReader *aIter;       /* One iterator for each phrase */
17481   int nIter;                      /* Number of iterators/phrases */
17482   int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol;
17483 
17484   nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17485   if( pCsr->aInstIter==0 ){
17486     sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter;
17487     pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
17488   }
17489   aIter = pCsr->aInstIter;
17490 
17491   if( aIter ){
17492     int nInst = 0;                /* Number instances seen so far */
17493     int i;
17494 
17495     /* Initialize all iterators */
17496     for(i=0; i<nIter && rc==SQLITE_OK; i++){
17497       const u8 *a;
17498       int n;
17499       rc = fts5CsrPoslist(pCsr, i, &a, &n);
17500       if( rc==SQLITE_OK ){
17501         sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]);
17502       }
17503     }
17504 
17505     if( rc==SQLITE_OK ){
17506       while( 1 ){
17507         int *aInst;
17508         int iBest = -1;
17509         for(i=0; i<nIter; i++){
17510           if( (aIter[i].bEof==0)
17511               && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos)
17512             ){
17513             iBest = i;
17514           }
17515         }
17516         if( iBest<0 ) break;
17517 
17518         nInst++;
17519         if( nInst>=pCsr->nInstAlloc ){
17520           int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
17521           aInst = (int*)sqlite3_realloc64(
17522               pCsr->aInst, nNewSize*sizeof(int)*3
17523               );
17524           if( aInst ){
17525             pCsr->aInst = aInst;
17526             pCsr->nInstAlloc = nNewSize;
17527           }else{
17528             nInst--;
17529             rc = SQLITE_NOMEM;
17530             break;
17531           }
17532         }
17533 
17534         aInst = &pCsr->aInst[3 * (nInst-1)];
17535         aInst[0] = iBest;
17536         aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
17537         aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
17538         if( aInst[1]<0 || aInst[1]>=nCol ){
17539           rc = FTS5_CORRUPT;
17540           break;
17541         }
17542         sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
17543       }
17544     }
17545 
17546     pCsr->nInstCount = nInst;
17547     CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST);
17548   }
17549   return rc;
17550 }
17551 
17552 static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){
17553   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17554   int rc = SQLITE_OK;
17555   if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
17556    || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){
17557     *pnInst = pCsr->nInstCount;
17558   }
17559   return rc;
17560 }
17561 
17562 static int fts5ApiInst(
17563   Fts5Context *pCtx,
17564   int iIdx,
17565   int *piPhrase,
17566   int *piCol,
17567   int *piOff
17568 ){
17569   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17570   int rc = SQLITE_OK;
17571   if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0
17572    || SQLITE_OK==(rc = fts5CacheInstArray(pCsr))
17573   ){
17574     if( iIdx<0 || iIdx>=pCsr->nInstCount ){
17575       rc = SQLITE_RANGE;
17576 #if 0
17577     }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){
17578       *piPhrase = pCsr->aInst[iIdx*3];
17579       *piCol = pCsr->aInst[iIdx*3 + 2];
17580       *piOff = -1;
17581 #endif
17582     }else{
17583       *piPhrase = pCsr->aInst[iIdx*3];
17584       *piCol = pCsr->aInst[iIdx*3 + 1];
17585       *piOff = pCsr->aInst[iIdx*3 + 2];
17586     }
17587   }
17588   return rc;
17589 }
17590 
17591 static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){
17592   return fts5CursorRowid((Fts5Cursor*)pCtx);
17593 }
17594 
17595 static int fts5ColumnSizeCb(
17596   void *pContext,                 /* Pointer to int */
17597   int tflags,
17598   const char *pUnused,            /* Buffer containing token */
17599   int nUnused,                    /* Size of token in bytes */
17600   int iUnused1,                   /* Start offset of token */
17601   int iUnused2                    /* End offset of token */
17602 ){
17603   int *pCnt = (int*)pContext;
17604   UNUSED_PARAM2(pUnused, nUnused);
17605   UNUSED_PARAM2(iUnused1, iUnused2);
17606   if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
17607     (*pCnt)++;
17608   }
17609   return SQLITE_OK;
17610 }
17611 
17612 static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){
17613   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17614   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17615   Fts5Config *pConfig = pTab->p.pConfig;
17616   int rc = SQLITE_OK;
17617 
17618   if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){
17619     if( pConfig->bColumnsize ){
17620       i64 iRowid = fts5CursorRowid(pCsr);
17621       rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize);
17622     }else if( pConfig->zContent==0 ){
17623       int i;
17624       for(i=0; i<pConfig->nCol; i++){
17625         if( pConfig->abUnindexed[i]==0 ){
17626           pCsr->aColumnSize[i] = -1;
17627         }
17628       }
17629     }else{
17630       int i;
17631       for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
17632         if( pConfig->abUnindexed[i]==0 ){
17633           const char *z; int n;
17634           void *p = (void*)(&pCsr->aColumnSize[i]);
17635           pCsr->aColumnSize[i] = 0;
17636           rc = fts5ApiColumnText(pCtx, i, &z, &n);
17637           if( rc==SQLITE_OK ){
17638             rc = sqlite3Fts5Tokenize(
17639                 pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb
17640             );
17641           }
17642         }
17643       }
17644     }
17645     CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE);
17646   }
17647   if( iCol<0 ){
17648     int i;
17649     *pnToken = 0;
17650     for(i=0; i<pConfig->nCol; i++){
17651       *pnToken += pCsr->aColumnSize[i];
17652     }
17653   }else if( iCol<pConfig->nCol ){
17654     *pnToken = pCsr->aColumnSize[iCol];
17655   }else{
17656     *pnToken = 0;
17657     rc = SQLITE_RANGE;
17658   }
17659   return rc;
17660 }
17661 
17662 /*
17663 ** Implementation of the xSetAuxdata() method.
17664 */
17665 static int fts5ApiSetAuxdata(
17666   Fts5Context *pCtx,              /* Fts5 context */
17667   void *pPtr,                     /* Pointer to save as auxdata */
17668   void(*xDelete)(void*)           /* Destructor for pPtr (or NULL) */
17669 ){
17670   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17671   Fts5Auxdata *pData;
17672 
17673   /* Search through the cursors list of Fts5Auxdata objects for one that
17674   ** corresponds to the currently executing auxiliary function.  */
17675   for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
17676     if( pData->pAux==pCsr->pAux ) break;
17677   }
17678 
17679   if( pData ){
17680     if( pData->xDelete ){
17681       pData->xDelete(pData->pPtr);
17682     }
17683   }else{
17684     int rc = SQLITE_OK;
17685     pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata));
17686     if( pData==0 ){
17687       if( xDelete ) xDelete(pPtr);
17688       return rc;
17689     }
17690     pData->pAux = pCsr->pAux;
17691     pData->pNext = pCsr->pAuxdata;
17692     pCsr->pAuxdata = pData;
17693   }
17694 
17695   pData->xDelete = xDelete;
17696   pData->pPtr = pPtr;
17697   return SQLITE_OK;
17698 }
17699 
17700 static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
17701   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17702   Fts5Auxdata *pData;
17703   void *pRet = 0;
17704 
17705   for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
17706     if( pData->pAux==pCsr->pAux ) break;
17707   }
17708 
17709   if( pData ){
17710     pRet = pData->pPtr;
17711     if( bClear ){
17712       pData->pPtr = 0;
17713       pData->xDelete = 0;
17714     }
17715   }
17716 
17717   return pRet;
17718 }
17719 
17720 static void fts5ApiPhraseNext(
17721   Fts5Context *pUnused,
17722   Fts5PhraseIter *pIter,
17723   int *piCol, int *piOff
17724 ){
17725   UNUSED_PARAM(pUnused);
17726   if( pIter->a>=pIter->b ){
17727     *piCol = -1;
17728     *piOff = -1;
17729   }else{
17730     int iVal;
17731     pIter->a += fts5GetVarint32(pIter->a, iVal);
17732     if( iVal==1 ){
17733       pIter->a += fts5GetVarint32(pIter->a, iVal);
17734       *piCol = iVal;
17735       *piOff = 0;
17736       pIter->a += fts5GetVarint32(pIter->a, iVal);
17737     }
17738     *piOff += (iVal-2);
17739   }
17740 }
17741 
17742 static int fts5ApiPhraseFirst(
17743   Fts5Context *pCtx,
17744   int iPhrase,
17745   Fts5PhraseIter *pIter,
17746   int *piCol, int *piOff
17747 ){
17748   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17749   int n;
17750   int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
17751   if( rc==SQLITE_OK ){
17752     assert( pIter->a || n==0 );
17753     pIter->b = (pIter->a ? &pIter->a[n] : 0);
17754     *piCol = 0;
17755     *piOff = 0;
17756     fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
17757   }
17758   return rc;
17759 }
17760 
17761 static void fts5ApiPhraseNextColumn(
17762   Fts5Context *pCtx,
17763   Fts5PhraseIter *pIter,
17764   int *piCol
17765 ){
17766   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17767   Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17768 
17769   if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
17770     if( pIter->a>=pIter->b ){
17771       *piCol = -1;
17772     }else{
17773       int iIncr;
17774       pIter->a += fts5GetVarint32(&pIter->a[0], iIncr);
17775       *piCol += (iIncr-2);
17776     }
17777   }else{
17778     while( 1 ){
17779       int dummy;
17780       if( pIter->a>=pIter->b ){
17781         *piCol = -1;
17782         return;
17783       }
17784       if( pIter->a[0]==0x01 ) break;
17785       pIter->a += fts5GetVarint32(pIter->a, dummy);
17786     }
17787     pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
17788   }
17789 }
17790 
17791 static int fts5ApiPhraseFirstColumn(
17792   Fts5Context *pCtx,
17793   int iPhrase,
17794   Fts5PhraseIter *pIter,
17795   int *piCol
17796 ){
17797   int rc = SQLITE_OK;
17798   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17799   Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig;
17800 
17801   if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
17802     Fts5Sorter *pSorter = pCsr->pSorter;
17803     int n;
17804     if( pSorter ){
17805       int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]);
17806       n = pSorter->aIdx[iPhrase] - i1;
17807       pIter->a = &pSorter->aPoslist[i1];
17808     }else{
17809       rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n);
17810     }
17811     if( rc==SQLITE_OK ){
17812       assert( pIter->a || n==0 );
17813       pIter->b = (pIter->a ? &pIter->a[n] : 0);
17814       *piCol = 0;
17815       fts5ApiPhraseNextColumn(pCtx, pIter, piCol);
17816     }
17817   }else{
17818     int n;
17819     rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n);
17820     if( rc==SQLITE_OK ){
17821       assert( pIter->a || n==0 );
17822       pIter->b = (pIter->a ? &pIter->a[n] : 0);
17823       if( n<=0 ){
17824         *piCol = -1;
17825       }else if( pIter->a[0]==0x01 ){
17826         pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol);
17827       }else{
17828         *piCol = 0;
17829       }
17830     }
17831   }
17832 
17833   return rc;
17834 }
17835 
17836 
17837 static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
17838     int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
17839 );
17840 
17841 static const Fts5ExtensionApi sFts5Api = {
17842   2,                            /* iVersion */
17843   fts5ApiUserData,
17844   fts5ApiColumnCount,
17845   fts5ApiRowCount,
17846   fts5ApiColumnTotalSize,
17847   fts5ApiTokenize,
17848   fts5ApiPhraseCount,
17849   fts5ApiPhraseSize,
17850   fts5ApiInstCount,
17851   fts5ApiInst,
17852   fts5ApiRowid,
17853   fts5ApiColumnText,
17854   fts5ApiColumnSize,
17855   fts5ApiQueryPhrase,
17856   fts5ApiSetAuxdata,
17857   fts5ApiGetAuxdata,
17858   fts5ApiPhraseFirst,
17859   fts5ApiPhraseNext,
17860   fts5ApiPhraseFirstColumn,
17861   fts5ApiPhraseNextColumn,
17862 };
17863 
17864 /*
17865 ** Implementation of API function xQueryPhrase().
17866 */
17867 static int fts5ApiQueryPhrase(
17868   Fts5Context *pCtx,
17869   int iPhrase,
17870   void *pUserData,
17871   int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
17872 ){
17873   Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
17874   Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab);
17875   int rc;
17876   Fts5Cursor *pNew = 0;
17877 
17878   rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
17879   if( rc==SQLITE_OK ){
17880     pNew->ePlan = FTS5_PLAN_MATCH;
17881     pNew->iFirstRowid = SMALLEST_INT64;
17882     pNew->iLastRowid = LARGEST_INT64;
17883     pNew->base.pVtab = (sqlite3_vtab*)pTab;
17884     rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr);
17885   }
17886 
17887   if( rc==SQLITE_OK ){
17888     for(rc = fts5CursorFirst(pTab, pNew, 0);
17889         rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
17890         rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
17891     ){
17892       rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
17893       if( rc!=SQLITE_OK ){
17894         if( rc==SQLITE_DONE ) rc = SQLITE_OK;
17895         break;
17896       }
17897     }
17898   }
17899 
17900   fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
17901   return rc;
17902 }
17903 
17904 static void fts5ApiInvoke(
17905   Fts5Auxiliary *pAux,
17906   Fts5Cursor *pCsr,
17907   sqlite3_context *context,
17908   int argc,
17909   sqlite3_value **argv
17910 ){
17911   assert( pCsr->pAux==0 );
17912   pCsr->pAux = pAux;
17913   pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv);
17914   pCsr->pAux = 0;
17915 }
17916 
17917 static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){
17918   Fts5Cursor *pCsr;
17919   for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){
17920     if( pCsr->iCsrId==iCsrId ) break;
17921   }
17922   return pCsr;
17923 }
17924 
17925 static void fts5ApiCallback(
17926   sqlite3_context *context,
17927   int argc,
17928   sqlite3_value **argv
17929 ){
17930 
17931   Fts5Auxiliary *pAux;
17932   Fts5Cursor *pCsr;
17933   i64 iCsrId;
17934 
17935   assert( argc>=1 );
17936   pAux = (Fts5Auxiliary*)sqlite3_user_data(context);
17937   iCsrId = sqlite3_value_int64(argv[0]);
17938 
17939   pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId);
17940   if( pCsr==0 || pCsr->ePlan==0 ){
17941     char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId);
17942     sqlite3_result_error(context, zErr, -1);
17943     sqlite3_free(zErr);
17944   }else{
17945     fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]);
17946   }
17947 }
17948 
17949 
17950 /*
17951 ** Given cursor id iId, return a pointer to the corresponding Fts5Table
17952 ** object. Or NULL If the cursor id does not exist.
17953 */
17954 static Fts5Table *sqlite3Fts5TableFromCsrid(
17955   Fts5Global *pGlobal,            /* FTS5 global context for db handle */
17956   i64 iCsrId                      /* Id of cursor to find */
17957 ){
17958   Fts5Cursor *pCsr;
17959   pCsr = fts5CursorFromCsrid(pGlobal, iCsrId);
17960   if( pCsr ){
17961     return (Fts5Table*)pCsr->base.pVtab;
17962   }
17963   return 0;
17964 }
17965 
17966 /*
17967 ** Return a "position-list blob" corresponding to the current position of
17968 ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains
17969 ** the current position-list for each phrase in the query associated with
17970 ** cursor pCsr.
17971 **
17972 ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is
17973 ** the number of phrases in the query. Following the varints are the
17974 ** concatenated position lists for each phrase, in order.
17975 **
17976 ** The first varint (if it exists) contains the size of the position list
17977 ** for phrase 0. The second (same disclaimer) contains the size of position
17978 ** list 1. And so on. There is no size field for the final position list,
17979 ** as it can be derived from the total size of the blob.
17980 */
17981 static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){
17982   int i;
17983   int rc = SQLITE_OK;
17984   int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
17985   Fts5Buffer val;
17986 
17987   memset(&val, 0, sizeof(Fts5Buffer));
17988   switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){
17989     case FTS5_DETAIL_FULL:
17990 
17991       /* Append the varints */
17992       for(i=0; i<(nPhrase-1); i++){
17993         const u8 *dummy;
17994         int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy);
17995         sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
17996       }
17997 
17998       /* Append the position lists */
17999       for(i=0; i<nPhrase; i++){
18000         const u8 *pPoslist;
18001         int nPoslist;
18002         nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist);
18003         sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
18004       }
18005       break;
18006 
18007     case FTS5_DETAIL_COLUMNS:
18008 
18009       /* Append the varints */
18010       for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){
18011         const u8 *dummy;
18012         int nByte;
18013         rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte);
18014         sqlite3Fts5BufferAppendVarint(&rc, &val, nByte);
18015       }
18016 
18017       /* Append the position lists */
18018       for(i=0; rc==SQLITE_OK && i<nPhrase; i++){
18019         const u8 *pPoslist;
18020         int nPoslist;
18021         rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist);
18022         sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist);
18023       }
18024       break;
18025 
18026     default:
18027       break;
18028   }
18029 
18030   sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free);
18031   return rc;
18032 }
18033 
18034 /*
18035 ** This is the xColumn method, called by SQLite to request a value from
18036 ** the row that the supplied cursor currently points to.
18037 */
18038 static int fts5ColumnMethod(
18039   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
18040   sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
18041   int iCol                        /* Index of column to read value from */
18042 ){
18043   Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab);
18044   Fts5Config *pConfig = pTab->p.pConfig;
18045   Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
18046   int rc = SQLITE_OK;
18047 
18048   assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 );
18049 
18050   if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){
18051     if( iCol==pConfig->nCol ){
18052       sqlite3_result_int64(pCtx, pCsr->iSpecial);
18053     }
18054   }else
18055 
18056   if( iCol==pConfig->nCol ){
18057     /* User is requesting the value of the special column with the same name
18058     ** as the table. Return the cursor integer id number. This value is only
18059     ** useful in that it may be passed as the first argument to an FTS5
18060     ** auxiliary function.  */
18061     sqlite3_result_int64(pCtx, pCsr->iCsrId);
18062   }else if( iCol==pConfig->nCol+1 ){
18063 
18064     /* The value of the "rank" column. */
18065     if( pCsr->ePlan==FTS5_PLAN_SOURCE ){
18066       fts5PoslistBlob(pCtx, pCsr);
18067     }else if(
18068         pCsr->ePlan==FTS5_PLAN_MATCH
18069      || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH
18070     ){
18071       if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){
18072         fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg);
18073       }
18074     }
18075   }else if( !fts5IsContentless(pTab) ){
18076     pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
18077     rc = fts5SeekCursor(pCsr, 1);
18078     if( rc==SQLITE_OK ){
18079       sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
18080     }
18081     pConfig->pzErrmsg = 0;
18082   }
18083   return rc;
18084 }
18085 
18086 
18087 /*
18088 ** This routine implements the xFindFunction method for the FTS3
18089 ** virtual table.
18090 */
18091 static int fts5FindFunctionMethod(
18092   sqlite3_vtab *pVtab,            /* Virtual table handle */
18093   int nUnused,                    /* Number of SQL function arguments */
18094   const char *zName,              /* Name of SQL function */
18095   void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */
18096   void **ppArg                    /* OUT: User data for *pxFunc */
18097 ){
18098   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18099   Fts5Auxiliary *pAux;
18100 
18101   UNUSED_PARAM(nUnused);
18102   pAux = fts5FindAuxiliary(pTab, zName);
18103   if( pAux ){
18104     *pxFunc = fts5ApiCallback;
18105     *ppArg = (void*)pAux;
18106     return 1;
18107   }
18108 
18109   /* No function of the specified name was found. Return 0. */
18110   return 0;
18111 }
18112 
18113 /*
18114 ** Implementation of FTS5 xRename method. Rename an fts5 table.
18115 */
18116 static int fts5RenameMethod(
18117   sqlite3_vtab *pVtab,            /* Virtual table handle */
18118   const char *zName               /* New name of table */
18119 ){
18120   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18121   return sqlite3Fts5StorageRename(pTab->pStorage, zName);
18122 }
18123 
18124 static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){
18125   fts5TripCursors((Fts5FullTable*)pTab);
18126   return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage);
18127 }
18128 
18129 /*
18130 ** The xSavepoint() method.
18131 **
18132 ** Flush the contents of the pending-terms table to disk.
18133 */
18134 static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
18135   UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
18136   fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_SAVEPOINT, iSavepoint);
18137   return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
18138 }
18139 
18140 /*
18141 ** The xRelease() method.
18142 **
18143 ** This is a no-op.
18144 */
18145 static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
18146   UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
18147   fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_RELEASE, iSavepoint);
18148   return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab);
18149 }
18150 
18151 /*
18152 ** The xRollbackTo() method.
18153 **
18154 ** Discard the contents of the pending terms table.
18155 */
18156 static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
18157   Fts5FullTable *pTab = (Fts5FullTable*)pVtab;
18158   UNUSED_PARAM(iSavepoint);  /* Call below is a no-op for NDEBUG builds */
18159   fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint);
18160   fts5TripCursors(pTab);
18161   return sqlite3Fts5StorageRollback(pTab->pStorage);
18162 }
18163 
18164 /*
18165 ** Register a new auxiliary function with global context pGlobal.
18166 */
18167 static int fts5CreateAux(
18168   fts5_api *pApi,                 /* Global context (one per db handle) */
18169   const char *zName,              /* Name of new function */
18170   void *pUserData,                /* User data for aux. function */
18171   fts5_extension_function xFunc,  /* Aux. function implementation */
18172   void(*xDestroy)(void*)          /* Destructor for pUserData */
18173 ){
18174   Fts5Global *pGlobal = (Fts5Global*)pApi;
18175   int rc = sqlite3_overload_function(pGlobal->db, zName, -1);
18176   if( rc==SQLITE_OK ){
18177     Fts5Auxiliary *pAux;
18178     sqlite3_int64 nName;            /* Size of zName in bytes, including \0 */
18179     sqlite3_int64 nByte;            /* Bytes of space to allocate */
18180 
18181     nName = strlen(zName) + 1;
18182     nByte = sizeof(Fts5Auxiliary) + nName;
18183     pAux = (Fts5Auxiliary*)sqlite3_malloc64(nByte);
18184     if( pAux ){
18185       memset(pAux, 0, (size_t)nByte);
18186       pAux->zFunc = (char*)&pAux[1];
18187       memcpy(pAux->zFunc, zName, nName);
18188       pAux->pGlobal = pGlobal;
18189       pAux->pUserData = pUserData;
18190       pAux->xFunc = xFunc;
18191       pAux->xDestroy = xDestroy;
18192       pAux->pNext = pGlobal->pAux;
18193       pGlobal->pAux = pAux;
18194     }else{
18195       rc = SQLITE_NOMEM;
18196     }
18197   }
18198 
18199   return rc;
18200 }
18201 
18202 /*
18203 ** Register a new tokenizer. This is the implementation of the
18204 ** fts5_api.xCreateTokenizer() method.
18205 */
18206 static int fts5CreateTokenizer(
18207   fts5_api *pApi,                 /* Global context (one per db handle) */
18208   const char *zName,              /* Name of new function */
18209   void *pUserData,                /* User data for aux. function */
18210   fts5_tokenizer *pTokenizer,     /* Tokenizer implementation */
18211   void(*xDestroy)(void*)          /* Destructor for pUserData */
18212 ){
18213   Fts5Global *pGlobal = (Fts5Global*)pApi;
18214   Fts5TokenizerModule *pNew;
18215   sqlite3_int64 nName;            /* Size of zName and its \0 terminator */
18216   sqlite3_int64 nByte;            /* Bytes of space to allocate */
18217   int rc = SQLITE_OK;
18218 
18219   nName = strlen(zName) + 1;
18220   nByte = sizeof(Fts5TokenizerModule) + nName;
18221   pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte);
18222   if( pNew ){
18223     memset(pNew, 0, (size_t)nByte);
18224     pNew->zName = (char*)&pNew[1];
18225     memcpy(pNew->zName, zName, nName);
18226     pNew->pUserData = pUserData;
18227     pNew->x = *pTokenizer;
18228     pNew->xDestroy = xDestroy;
18229     pNew->pNext = pGlobal->pTok;
18230     pGlobal->pTok = pNew;
18231     if( pNew->pNext==0 ){
18232       pGlobal->pDfltTok = pNew;
18233     }
18234   }else{
18235     rc = SQLITE_NOMEM;
18236   }
18237 
18238   return rc;
18239 }
18240 
18241 static Fts5TokenizerModule *fts5LocateTokenizer(
18242   Fts5Global *pGlobal,
18243   const char *zName
18244 ){
18245   Fts5TokenizerModule *pMod = 0;
18246 
18247   if( zName==0 ){
18248     pMod = pGlobal->pDfltTok;
18249   }else{
18250     for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){
18251       if( sqlite3_stricmp(zName, pMod->zName)==0 ) break;
18252     }
18253   }
18254 
18255   return pMod;
18256 }
18257 
18258 /*
18259 ** Find a tokenizer. This is the implementation of the
18260 ** fts5_api.xFindTokenizer() method.
18261 */
18262 static int fts5FindTokenizer(
18263   fts5_api *pApi,                 /* Global context (one per db handle) */
18264   const char *zName,              /* Name of new function */
18265   void **ppUserData,
18266   fts5_tokenizer *pTokenizer      /* Populate this object */
18267 ){
18268   int rc = SQLITE_OK;
18269   Fts5TokenizerModule *pMod;
18270 
18271   pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
18272   if( pMod ){
18273     *pTokenizer = pMod->x;
18274     *ppUserData = pMod->pUserData;
18275   }else{
18276     memset(pTokenizer, 0, sizeof(fts5_tokenizer));
18277     rc = SQLITE_ERROR;
18278   }
18279 
18280   return rc;
18281 }
18282 
18283 static int sqlite3Fts5GetTokenizer(
18284   Fts5Global *pGlobal,
18285   const char **azArg,
18286   int nArg,
18287   Fts5Config *pConfig,
18288   char **pzErr
18289 ){
18290   Fts5TokenizerModule *pMod;
18291   int rc = SQLITE_OK;
18292 
18293   pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]);
18294   if( pMod==0 ){
18295     assert( nArg>0 );
18296     rc = SQLITE_ERROR;
18297     *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]);
18298   }else{
18299     rc = pMod->x.xCreate(
18300         pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok
18301     );
18302     pConfig->pTokApi = &pMod->x;
18303     if( rc!=SQLITE_OK ){
18304       if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor");
18305     }else{
18306       pConfig->ePattern = sqlite3Fts5TokenizerPattern(
18307           pMod->x.xCreate, pConfig->pTok
18308       );
18309     }
18310   }
18311 
18312   if( rc!=SQLITE_OK ){
18313     pConfig->pTokApi = 0;
18314     pConfig->pTok = 0;
18315   }
18316 
18317   return rc;
18318 }
18319 
18320 static void fts5ModuleDestroy(void *pCtx){
18321   Fts5TokenizerModule *pTok, *pNextTok;
18322   Fts5Auxiliary *pAux, *pNextAux;
18323   Fts5Global *pGlobal = (Fts5Global*)pCtx;
18324 
18325   for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){
18326     pNextAux = pAux->pNext;
18327     if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData);
18328     sqlite3_free(pAux);
18329   }
18330 
18331   for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){
18332     pNextTok = pTok->pNext;
18333     if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData);
18334     sqlite3_free(pTok);
18335   }
18336 
18337   sqlite3_free(pGlobal);
18338 }
18339 
18340 static void fts5Fts5Func(
18341   sqlite3_context *pCtx,          /* Function call context */
18342   int nArg,                       /* Number of args */
18343   sqlite3_value **apArg           /* Function arguments */
18344 ){
18345   Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx);
18346   fts5_api **ppApi;
18347   UNUSED_PARAM(nArg);
18348   assert( nArg==1 );
18349   ppApi = (fts5_api**)sqlite3_value_pointer(apArg[0], "fts5_api_ptr");
18350   if( ppApi ) *ppApi = &pGlobal->api;
18351 }
18352 
18353 /*
18354 ** Implementation of fts5_source_id() function.
18355 */
18356 static void fts5SourceIdFunc(
18357   sqlite3_context *pCtx,          /* Function call context */
18358   int nArg,                       /* Number of args */
18359   sqlite3_value **apUnused        /* Function arguments */
18360 ){
18361   assert( nArg==0 );
18362   UNUSED_PARAM2(nArg, apUnused);
18363   sqlite3_result_text(pCtx, "fts5: 2022-11-16 12:10:08 89c459e766ea7e9165d0beeb124708b955a4950d0f4792f457465d71b158d318", -1, SQLITE_TRANSIENT);
18364 }
18365 
18366 /*
18367 ** Return true if zName is the extension on one of the shadow tables used
18368 ** by this module.
18369 */
18370 static int fts5ShadowName(const char *zName){
18371   static const char *azName[] = {
18372     "config", "content", "data", "docsize", "idx"
18373   };
18374   unsigned int i;
18375   for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){
18376     if( sqlite3_stricmp(zName, azName[i])==0 ) return 1;
18377   }
18378   return 0;
18379 }
18380 
18381 static int fts5Init(sqlite3 *db){
18382   static const sqlite3_module fts5Mod = {
18383     /* iVersion      */ 3,
18384     /* xCreate       */ fts5CreateMethod,
18385     /* xConnect      */ fts5ConnectMethod,
18386     /* xBestIndex    */ fts5BestIndexMethod,
18387     /* xDisconnect   */ fts5DisconnectMethod,
18388     /* xDestroy      */ fts5DestroyMethod,
18389     /* xOpen         */ fts5OpenMethod,
18390     /* xClose        */ fts5CloseMethod,
18391     /* xFilter       */ fts5FilterMethod,
18392     /* xNext         */ fts5NextMethod,
18393     /* xEof          */ fts5EofMethod,
18394     /* xColumn       */ fts5ColumnMethod,
18395     /* xRowid        */ fts5RowidMethod,
18396     /* xUpdate       */ fts5UpdateMethod,
18397     /* xBegin        */ fts5BeginMethod,
18398     /* xSync         */ fts5SyncMethod,
18399     /* xCommit       */ fts5CommitMethod,
18400     /* xRollback     */ fts5RollbackMethod,
18401     /* xFindFunction */ fts5FindFunctionMethod,
18402     /* xRename       */ fts5RenameMethod,
18403     /* xSavepoint    */ fts5SavepointMethod,
18404     /* xRelease      */ fts5ReleaseMethod,
18405     /* xRollbackTo   */ fts5RollbackToMethod,
18406     /* xShadowName   */ fts5ShadowName
18407   };
18408 
18409   int rc;
18410   Fts5Global *pGlobal = 0;
18411 
18412   pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global));
18413   if( pGlobal==0 ){
18414     rc = SQLITE_NOMEM;
18415   }else{
18416     void *p = (void*)pGlobal;
18417     memset(pGlobal, 0, sizeof(Fts5Global));
18418     pGlobal->db = db;
18419     pGlobal->api.iVersion = 2;
18420     pGlobal->api.xCreateFunction = fts5CreateAux;
18421     pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
18422     pGlobal->api.xFindTokenizer = fts5FindTokenizer;
18423     rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
18424     if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
18425     if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
18426     if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api);
18427     if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api);
18428     if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db);
18429     if( rc==SQLITE_OK ){
18430       rc = sqlite3_create_function(
18431           db, "fts5", 1, SQLITE_UTF8, p, fts5Fts5Func, 0, 0
18432       );
18433     }
18434     if( rc==SQLITE_OK ){
18435       rc = sqlite3_create_function(
18436           db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0
18437       );
18438     }
18439   }
18440 
18441   /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
18442   ** fts5_test_mi.c is compiled and linked into the executable. And call
18443   ** its entry point to enable the matchinfo() demo.  */
18444 #ifdef SQLITE_FTS5_ENABLE_TEST_MI
18445   if( rc==SQLITE_OK ){
18446     extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*);
18447     rc = sqlite3Fts5TestRegisterMatchinfo(db);
18448   }
18449 #endif
18450 
18451   return rc;
18452 }
18453 
18454 /*
18455 ** The following functions are used to register the module with SQLite. If
18456 ** this module is being built as part of the SQLite core (SQLITE_CORE is
18457 ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly.
18458 **
18459 ** Or, if this module is being built as a loadable extension,
18460 ** sqlite3Fts5Init() is omitted and the two standard entry points
18461 ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead.
18462 */
18463 #ifndef SQLITE_CORE
18464 #ifdef _WIN32
18465 __declspec(dllexport)
18466 #endif
18467 int sqlite3_fts_init(
18468   sqlite3 *db,
18469   char **pzErrMsg,
18470   const sqlite3_api_routines *pApi
18471 ){
18472   SQLITE_EXTENSION_INIT2(pApi);
18473   (void)pzErrMsg;  /* Unused parameter */
18474   return fts5Init(db);
18475 }
18476 
18477 #ifdef _WIN32
18478 __declspec(dllexport)
18479 #endif
18480 int sqlite3_fts5_init(
18481   sqlite3 *db,
18482   char **pzErrMsg,
18483   const sqlite3_api_routines *pApi
18484 ){
18485   SQLITE_EXTENSION_INIT2(pApi);
18486   (void)pzErrMsg;  /* Unused parameter */
18487   return fts5Init(db);
18488 }
18489 #else
18490 int sqlite3Fts5Init(sqlite3 *db){
18491   return fts5Init(db);
18492 }
18493 #endif
18494 
18495 #line 1 "fts5_storage.c"
18496 /*
18497 ** 2014 May 31
18498 **
18499 ** The author disclaims copyright to this source code.  In place of
18500 ** a legal notice, here is a blessing:
18501 **
18502 **    May you do good and not evil.
18503 **    May you find forgiveness for yourself and forgive others.
18504 **    May you share freely, never taking more than you give.
18505 **
18506 ******************************************************************************
18507 **
18508 */
18509 
18510 
18511 
18512 /* #include "fts5Int.h" */
18513 
18514 struct Fts5Storage {
18515   Fts5Config *pConfig;
18516   Fts5Index *pIndex;
18517   int bTotalsValid;               /* True if nTotalRow/aTotalSize[] are valid */
18518   i64 nTotalRow;                  /* Total number of rows in FTS table */
18519   i64 *aTotalSize;                /* Total sizes of each column */
18520   sqlite3_stmt *aStmt[11];
18521 };
18522 
18523 
18524 #if FTS5_STMT_SCAN_ASC!=0
18525 # error "FTS5_STMT_SCAN_ASC mismatch"
18526 #endif
18527 #if FTS5_STMT_SCAN_DESC!=1
18528 # error "FTS5_STMT_SCAN_DESC mismatch"
18529 #endif
18530 #if FTS5_STMT_LOOKUP!=2
18531 # error "FTS5_STMT_LOOKUP mismatch"
18532 #endif
18533 
18534 #define FTS5_STMT_INSERT_CONTENT  3
18535 #define FTS5_STMT_REPLACE_CONTENT 4
18536 #define FTS5_STMT_DELETE_CONTENT  5
18537 #define FTS5_STMT_REPLACE_DOCSIZE  6
18538 #define FTS5_STMT_DELETE_DOCSIZE  7
18539 #define FTS5_STMT_LOOKUP_DOCSIZE  8
18540 #define FTS5_STMT_REPLACE_CONFIG 9
18541 #define FTS5_STMT_SCAN 10
18542 
18543 /*
18544 ** Prepare the two insert statements - Fts5Storage.pInsertContent and
18545 ** Fts5Storage.pInsertDocsize - if they have not already been prepared.
18546 ** Return SQLITE_OK if successful, or an SQLite error code if an error
18547 ** occurs.
18548 */
18549 static int fts5StorageGetStmt(
18550   Fts5Storage *p,                 /* Storage handle */
18551   int eStmt,                      /* FTS5_STMT_XXX constant */
18552   sqlite3_stmt **ppStmt,          /* OUT: Prepared statement handle */
18553   char **pzErrMsg                 /* OUT: Error message (if any) */
18554 ){
18555   int rc = SQLITE_OK;
18556 
18557   /* If there is no %_docsize table, there should be no requests for
18558   ** statements to operate on it.  */
18559   assert( p->pConfig->bColumnsize || (
18560         eStmt!=FTS5_STMT_REPLACE_DOCSIZE
18561      && eStmt!=FTS5_STMT_DELETE_DOCSIZE
18562      && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE
18563   ));
18564 
18565   assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) );
18566   if( p->aStmt[eStmt]==0 ){
18567     const char *azStmt[] = {
18568       "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC",
18569       "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC",
18570       "SELECT %s FROM %s T WHERE T.%Q=?",               /* LOOKUP  */
18571 
18572       "INSERT INTO %Q.'%q_content' VALUES(%s)",         /* INSERT_CONTENT  */
18573       "REPLACE INTO %Q.'%q_content' VALUES(%s)",        /* REPLACE_CONTENT */
18574       "DELETE FROM %Q.'%q_content' WHERE id=?",         /* DELETE_CONTENT  */
18575       "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",       /* REPLACE_DOCSIZE  */
18576       "DELETE FROM %Q.'%q_docsize' WHERE id=?",         /* DELETE_DOCSIZE  */
18577 
18578       "SELECT sz FROM %Q.'%q_docsize' WHERE id=?",      /* LOOKUP_DOCSIZE  */
18579 
18580       "REPLACE INTO %Q.'%q_config' VALUES(?,?)",        /* REPLACE_CONFIG */
18581       "SELECT %s FROM %s AS T",                         /* SCAN */
18582     };
18583     Fts5Config *pC = p->pConfig;
18584     char *zSql = 0;
18585 
18586     switch( eStmt ){
18587       case FTS5_STMT_SCAN:
18588         zSql = sqlite3_mprintf(azStmt[eStmt],
18589             pC->zContentExprlist, pC->zContent
18590         );
18591         break;
18592 
18593       case FTS5_STMT_SCAN_ASC:
18594       case FTS5_STMT_SCAN_DESC:
18595         zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist,
18596             pC->zContent, pC->zContentRowid, pC->zContentRowid,
18597             pC->zContentRowid
18598         );
18599         break;
18600 
18601       case FTS5_STMT_LOOKUP:
18602         zSql = sqlite3_mprintf(azStmt[eStmt],
18603             pC->zContentExprlist, pC->zContent, pC->zContentRowid
18604         );
18605         break;
18606 
18607       case FTS5_STMT_INSERT_CONTENT:
18608       case FTS5_STMT_REPLACE_CONTENT: {
18609         int nCol = pC->nCol + 1;
18610         char *zBind;
18611         int i;
18612 
18613         zBind = sqlite3_malloc64(1 + nCol*2);
18614         if( zBind ){
18615           for(i=0; i<nCol; i++){
18616             zBind[i*2] = '?';
18617             zBind[i*2 + 1] = ',';
18618           }
18619           zBind[i*2-1] = '\0';
18620           zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
18621           sqlite3_free(zBind);
18622         }
18623         break;
18624       }
18625 
18626       default:
18627         zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName);
18628         break;
18629     }
18630 
18631     if( zSql==0 ){
18632       rc = SQLITE_NOMEM;
18633     }else{
18634       int f = SQLITE_PREPARE_PERSISTENT;
18635       if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB;
18636       p->pConfig->bLock++;
18637       rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0);
18638       p->pConfig->bLock--;
18639       sqlite3_free(zSql);
18640       if( rc!=SQLITE_OK && pzErrMsg ){
18641         *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db));
18642       }
18643     }
18644   }
18645 
18646   *ppStmt = p->aStmt[eStmt];
18647   sqlite3_reset(*ppStmt);
18648   return rc;
18649 }
18650 
18651 
18652 static int fts5ExecPrintf(
18653   sqlite3 *db,
18654   char **pzErr,
18655   const char *zFormat,
18656   ...
18657 ){
18658   int rc;
18659   va_list ap;                     /* ... printf arguments */
18660   char *zSql;
18661 
18662   va_start(ap, zFormat);
18663   zSql = sqlite3_vmprintf(zFormat, ap);
18664 
18665   if( zSql==0 ){
18666     rc = SQLITE_NOMEM;
18667   }else{
18668     rc = sqlite3_exec(db, zSql, 0, 0, pzErr);
18669     sqlite3_free(zSql);
18670   }
18671 
18672   va_end(ap);
18673   return rc;
18674 }
18675 
18676 /*
18677 ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error
18678 ** code otherwise.
18679 */
18680 static int sqlite3Fts5DropAll(Fts5Config *pConfig){
18681   int rc = fts5ExecPrintf(pConfig->db, 0,
18682       "DROP TABLE IF EXISTS %Q.'%q_data';"
18683       "DROP TABLE IF EXISTS %Q.'%q_idx';"
18684       "DROP TABLE IF EXISTS %Q.'%q_config';",
18685       pConfig->zDb, pConfig->zName,
18686       pConfig->zDb, pConfig->zName,
18687       pConfig->zDb, pConfig->zName
18688   );
18689   if( rc==SQLITE_OK && pConfig->bColumnsize ){
18690     rc = fts5ExecPrintf(pConfig->db, 0,
18691         "DROP TABLE IF EXISTS %Q.'%q_docsize';",
18692         pConfig->zDb, pConfig->zName
18693     );
18694   }
18695   if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
18696     rc = fts5ExecPrintf(pConfig->db, 0,
18697         "DROP TABLE IF EXISTS %Q.'%q_content';",
18698         pConfig->zDb, pConfig->zName
18699     );
18700   }
18701   return rc;
18702 }
18703 
18704 static void fts5StorageRenameOne(
18705   Fts5Config *pConfig,            /* Current FTS5 configuration */
18706   int *pRc,                       /* IN/OUT: Error code */
18707   const char *zTail,              /* Tail of table name e.g. "data", "config" */
18708   const char *zName               /* New name of FTS5 table */
18709 ){
18710   if( *pRc==SQLITE_OK ){
18711     *pRc = fts5ExecPrintf(pConfig->db, 0,
18712         "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';",
18713         pConfig->zDb, pConfig->zName, zTail, zName, zTail
18714     );
18715   }
18716 }
18717 
18718 static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
18719   Fts5Config *pConfig = pStorage->pConfig;
18720   int rc = sqlite3Fts5StorageSync(pStorage);
18721 
18722   fts5StorageRenameOne(pConfig, &rc, "data", zName);
18723   fts5StorageRenameOne(pConfig, &rc, "idx", zName);
18724   fts5StorageRenameOne(pConfig, &rc, "config", zName);
18725   if( pConfig->bColumnsize ){
18726     fts5StorageRenameOne(pConfig, &rc, "docsize", zName);
18727   }
18728   if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
18729     fts5StorageRenameOne(pConfig, &rc, "content", zName);
18730   }
18731   return rc;
18732 }
18733 
18734 /*
18735 ** Create the shadow table named zPost, with definition zDefn. Return
18736 ** SQLITE_OK if successful, or an SQLite error code otherwise.
18737 */
18738 static int sqlite3Fts5CreateTable(
18739   Fts5Config *pConfig,            /* FTS5 configuration */
18740   const char *zPost,              /* Shadow table to create (e.g. "content") */
18741   const char *zDefn,              /* Columns etc. for shadow table */
18742   int bWithout,                   /* True for without rowid */
18743   char **pzErr                    /* OUT: Error message */
18744 ){
18745   int rc;
18746   char *zErr = 0;
18747 
18748   rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s",
18749       pConfig->zDb, pConfig->zName, zPost, zDefn,
18750 #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID
18751       bWithout?" WITHOUT ROWID":
18752 #endif
18753       ""
18754   );
18755   if( zErr ){
18756     *pzErr = sqlite3_mprintf(
18757         "fts5: error creating shadow table %q_%s: %s",
18758         pConfig->zName, zPost, zErr
18759     );
18760     sqlite3_free(zErr);
18761   }
18762 
18763   return rc;
18764 }
18765 
18766 /*
18767 ** Open a new Fts5Index handle. If the bCreate argument is true, create
18768 ** and initialize the underlying tables
18769 **
18770 ** If successful, set *pp to point to the new object and return SQLITE_OK.
18771 ** Otherwise, set *pp to NULL and return an SQLite error code.
18772 */
18773 static int sqlite3Fts5StorageOpen(
18774   Fts5Config *pConfig,
18775   Fts5Index *pIndex,
18776   int bCreate,
18777   Fts5Storage **pp,
18778   char **pzErr                    /* OUT: Error message */
18779 ){
18780   int rc = SQLITE_OK;
18781   Fts5Storage *p;                 /* New object */
18782   sqlite3_int64 nByte;            /* Bytes of space to allocate */
18783 
18784   nByte = sizeof(Fts5Storage)               /* Fts5Storage object */
18785         + pConfig->nCol * sizeof(i64);      /* Fts5Storage.aTotalSize[] */
18786   *pp = p = (Fts5Storage*)sqlite3_malloc64(nByte);
18787   if( !p ) return SQLITE_NOMEM;
18788 
18789   memset(p, 0, (size_t)nByte);
18790   p->aTotalSize = (i64*)&p[1];
18791   p->pConfig = pConfig;
18792   p->pIndex = pIndex;
18793 
18794   if( bCreate ){
18795     if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
18796       int nDefn = 32 + pConfig->nCol*10;
18797       char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10);
18798       if( zDefn==0 ){
18799         rc = SQLITE_NOMEM;
18800       }else{
18801         int i;
18802         int iOff;
18803         sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY");
18804         iOff = (int)strlen(zDefn);
18805         for(i=0; i<pConfig->nCol; i++){
18806           sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
18807           iOff += (int)strlen(&zDefn[iOff]);
18808         }
18809         rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
18810       }
18811       sqlite3_free(zDefn);
18812     }
18813 
18814     if( rc==SQLITE_OK && pConfig->bColumnsize ){
18815       rc = sqlite3Fts5CreateTable(
18816           pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr
18817       );
18818     }
18819     if( rc==SQLITE_OK ){
18820       rc = sqlite3Fts5CreateTable(
18821           pConfig, "config", "k PRIMARY KEY, v", 1, pzErr
18822       );
18823     }
18824     if( rc==SQLITE_OK ){
18825       rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
18826     }
18827   }
18828 
18829   if( rc ){
18830     sqlite3Fts5StorageClose(p);
18831     *pp = 0;
18832   }
18833   return rc;
18834 }
18835 
18836 /*
18837 ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen().
18838 */
18839 static int sqlite3Fts5StorageClose(Fts5Storage *p){
18840   int rc = SQLITE_OK;
18841   if( p ){
18842     int i;
18843 
18844     /* Finalize all SQL statements */
18845     for(i=0; i<ArraySize(p->aStmt); i++){
18846       sqlite3_finalize(p->aStmt[i]);
18847     }
18848 
18849     sqlite3_free(p);
18850   }
18851   return rc;
18852 }
18853 
18854 typedef struct Fts5InsertCtx Fts5InsertCtx;
18855 struct Fts5InsertCtx {
18856   Fts5Storage *pStorage;
18857   int iCol;
18858   int szCol;                      /* Size of column value in tokens */
18859 };
18860 
18861 /*
18862 ** Tokenization callback used when inserting tokens into the FTS index.
18863 */
18864 static int fts5StorageInsertCallback(
18865   void *pContext,                 /* Pointer to Fts5InsertCtx object */
18866   int tflags,
18867   const char *pToken,             /* Buffer containing token */
18868   int nToken,                     /* Size of token in bytes */
18869   int iUnused1,                   /* Start offset of token */
18870   int iUnused2                    /* End offset of token */
18871 ){
18872   Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
18873   Fts5Index *pIdx = pCtx->pStorage->pIndex;
18874   UNUSED_PARAM2(iUnused1, iUnused2);
18875   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
18876   if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
18877     pCtx->szCol++;
18878   }
18879   return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
18880 }
18881 
18882 /*
18883 ** If a row with rowid iDel is present in the %_content table, add the
18884 ** delete-markers to the FTS index necessary to delete it. Do not actually
18885 ** remove the %_content row at this time though.
18886 */
18887 static int fts5StorageDeleteFromIndex(
18888   Fts5Storage *p,
18889   i64 iDel,
18890   sqlite3_value **apVal
18891 ){
18892   Fts5Config *pConfig = p->pConfig;
18893   sqlite3_stmt *pSeek = 0;        /* SELECT to read row iDel from %_data */
18894   int rc;                         /* Return code */
18895   int rc2;                        /* sqlite3_reset() return code */
18896   int iCol;
18897   Fts5InsertCtx ctx;
18898 
18899   if( apVal==0 ){
18900     rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0);
18901     if( rc!=SQLITE_OK ) return rc;
18902     sqlite3_bind_int64(pSeek, 1, iDel);
18903     if( sqlite3_step(pSeek)!=SQLITE_ROW ){
18904       return sqlite3_reset(pSeek);
18905     }
18906   }
18907 
18908   ctx.pStorage = p;
18909   ctx.iCol = -1;
18910   rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel);
18911   for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
18912     if( pConfig->abUnindexed[iCol-1]==0 ){
18913       const char *zText;
18914       int nText;
18915       assert( pSeek==0 || apVal==0 );
18916       assert( pSeek!=0 || apVal!=0 );
18917       if( pSeek ){
18918         zText = (const char*)sqlite3_column_text(pSeek, iCol);
18919         nText = sqlite3_column_bytes(pSeek, iCol);
18920       }else if( ALWAYS(apVal) ){
18921         zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
18922         nText = sqlite3_value_bytes(apVal[iCol-1]);
18923       }else{
18924         continue;
18925       }
18926       ctx.szCol = 0;
18927       rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
18928           zText, nText, (void*)&ctx, fts5StorageInsertCallback
18929       );
18930       p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
18931       if( p->aTotalSize[iCol-1]<0 ){
18932         rc = FTS5_CORRUPT;
18933       }
18934     }
18935   }
18936   if( rc==SQLITE_OK && p->nTotalRow<1 ){
18937     rc = FTS5_CORRUPT;
18938   }else{
18939     p->nTotalRow--;
18940   }
18941 
18942   rc2 = sqlite3_reset(pSeek);
18943   if( rc==SQLITE_OK ) rc = rc2;
18944   return rc;
18945 }
18946 
18947 
18948 /*
18949 ** Insert a record into the %_docsize table. Specifically, do:
18950 **
18951 **   INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf);
18952 **
18953 ** If there is no %_docsize table (as happens if the columnsize=0 option
18954 ** is specified when the FTS5 table is created), this function is a no-op.
18955 */
18956 static int fts5StorageInsertDocsize(
18957   Fts5Storage *p,                 /* Storage module to write to */
18958   i64 iRowid,                     /* id value */
18959   Fts5Buffer *pBuf                /* sz value */
18960 ){
18961   int rc = SQLITE_OK;
18962   if( p->pConfig->bColumnsize ){
18963     sqlite3_stmt *pReplace = 0;
18964     rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
18965     if( rc==SQLITE_OK ){
18966       sqlite3_bind_int64(pReplace, 1, iRowid);
18967       sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC);
18968       sqlite3_step(pReplace);
18969       rc = sqlite3_reset(pReplace);
18970       sqlite3_bind_null(pReplace, 2);
18971     }
18972   }
18973   return rc;
18974 }
18975 
18976 /*
18977 ** Load the contents of the "averages" record from disk into the
18978 ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if
18979 ** argument bCache is true, set the p->bTotalsValid flag to indicate
18980 ** that the contents of aTotalSize[] and nTotalRow are valid until
18981 ** further notice.
18982 **
18983 ** Return SQLITE_OK if successful, or an SQLite error code if an error
18984 ** occurs.
18985 */
18986 static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){
18987   int rc = SQLITE_OK;
18988   if( p->bTotalsValid==0 ){
18989     rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize);
18990     p->bTotalsValid = bCache;
18991   }
18992   return rc;
18993 }
18994 
18995 /*
18996 ** Store the current contents of the p->nTotalRow and p->aTotalSize[]
18997 ** variables in the "averages" record on disk.
18998 **
18999 ** Return SQLITE_OK if successful, or an SQLite error code if an error
19000 ** occurs.
19001 */
19002 static int fts5StorageSaveTotals(Fts5Storage *p){
19003   int nCol = p->pConfig->nCol;
19004   int i;
19005   Fts5Buffer buf;
19006   int rc = SQLITE_OK;
19007   memset(&buf, 0, sizeof(buf));
19008 
19009   sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow);
19010   for(i=0; i<nCol; i++){
19011     sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]);
19012   }
19013   if( rc==SQLITE_OK ){
19014     rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n);
19015   }
19016   sqlite3_free(buf.p);
19017 
19018   return rc;
19019 }
19020 
19021 /*
19022 ** Remove a row from the FTS table.
19023 */
19024 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){
19025   Fts5Config *pConfig = p->pConfig;
19026   int rc;
19027   sqlite3_stmt *pDel = 0;
19028 
19029   assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 );
19030   rc = fts5StorageLoadTotals(p, 1);
19031 
19032   /* Delete the index records */
19033   if( rc==SQLITE_OK ){
19034     rc = fts5StorageDeleteFromIndex(p, iDel, apVal);
19035   }
19036 
19037   /* Delete the %_docsize record */
19038   if( rc==SQLITE_OK && pConfig->bColumnsize ){
19039     rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0);
19040     if( rc==SQLITE_OK ){
19041       sqlite3_bind_int64(pDel, 1, iDel);
19042       sqlite3_step(pDel);
19043       rc = sqlite3_reset(pDel);
19044     }
19045   }
19046 
19047   /* Delete the %_content record */
19048   if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
19049     if( rc==SQLITE_OK ){
19050       rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0);
19051     }
19052     if( rc==SQLITE_OK ){
19053       sqlite3_bind_int64(pDel, 1, iDel);
19054       sqlite3_step(pDel);
19055       rc = sqlite3_reset(pDel);
19056     }
19057   }
19058 
19059   return rc;
19060 }
19061 
19062 /*
19063 ** Delete all entries in the FTS5 index.
19064 */
19065 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){
19066   Fts5Config *pConfig = p->pConfig;
19067   int rc;
19068 
19069   p->bTotalsValid = 0;
19070 
19071   /* Delete the contents of the %_data and %_docsize tables. */
19072   rc = fts5ExecPrintf(pConfig->db, 0,
19073       "DELETE FROM %Q.'%q_data';"
19074       "DELETE FROM %Q.'%q_idx';",
19075       pConfig->zDb, pConfig->zName,
19076       pConfig->zDb, pConfig->zName
19077   );
19078   if( rc==SQLITE_OK && pConfig->bColumnsize ){
19079     rc = fts5ExecPrintf(pConfig->db, 0,
19080         "DELETE FROM %Q.'%q_docsize';",
19081         pConfig->zDb, pConfig->zName
19082     );
19083   }
19084 
19085   /* Reinitialize the %_data table. This call creates the initial structure
19086   ** and averages records.  */
19087   if( rc==SQLITE_OK ){
19088     rc = sqlite3Fts5IndexReinit(p->pIndex);
19089   }
19090   if( rc==SQLITE_OK ){
19091     rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION);
19092   }
19093   return rc;
19094 }
19095 
19096 static int sqlite3Fts5StorageRebuild(Fts5Storage *p){
19097   Fts5Buffer buf = {0,0,0};
19098   Fts5Config *pConfig = p->pConfig;
19099   sqlite3_stmt *pScan = 0;
19100   Fts5InsertCtx ctx;
19101   int rc, rc2;
19102 
19103   memset(&ctx, 0, sizeof(Fts5InsertCtx));
19104   ctx.pStorage = p;
19105   rc = sqlite3Fts5StorageDeleteAll(p);
19106   if( rc==SQLITE_OK ){
19107     rc = fts5StorageLoadTotals(p, 1);
19108   }
19109 
19110   if( rc==SQLITE_OK ){
19111     rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
19112   }
19113 
19114   while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){
19115     i64 iRowid = sqlite3_column_int64(pScan, 0);
19116 
19117     sqlite3Fts5BufferZero(&buf);
19118     rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
19119     for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
19120       ctx.szCol = 0;
19121       if( pConfig->abUnindexed[ctx.iCol]==0 ){
19122         const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1);
19123         int nText = sqlite3_column_bytes(pScan, ctx.iCol+1);
19124         rc = sqlite3Fts5Tokenize(pConfig,
19125             FTS5_TOKENIZE_DOCUMENT,
19126             zText, nText,
19127             (void*)&ctx,
19128             fts5StorageInsertCallback
19129         );
19130       }
19131       sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
19132       p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
19133     }
19134     p->nTotalRow++;
19135 
19136     if( rc==SQLITE_OK ){
19137       rc = fts5StorageInsertDocsize(p, iRowid, &buf);
19138     }
19139   }
19140   sqlite3_free(buf.p);
19141   rc2 = sqlite3_reset(pScan);
19142   if( rc==SQLITE_OK ) rc = rc2;
19143 
19144   /* Write the averages record */
19145   if( rc==SQLITE_OK ){
19146     rc = fts5StorageSaveTotals(p);
19147   }
19148   return rc;
19149 }
19150 
19151 static int sqlite3Fts5StorageOptimize(Fts5Storage *p){
19152   return sqlite3Fts5IndexOptimize(p->pIndex);
19153 }
19154 
19155 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){
19156   return sqlite3Fts5IndexMerge(p->pIndex, nMerge);
19157 }
19158 
19159 static int sqlite3Fts5StorageReset(Fts5Storage *p){
19160   return sqlite3Fts5IndexReset(p->pIndex);
19161 }
19162 
19163 /*
19164 ** Allocate a new rowid. This is used for "external content" tables when
19165 ** a NULL value is inserted into the rowid column. The new rowid is allocated
19166 ** by inserting a dummy row into the %_docsize table. The dummy will be
19167 ** overwritten later.
19168 **
19169 ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In
19170 ** this case the user is required to provide a rowid explicitly.
19171 */
19172 static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){
19173   int rc = SQLITE_MISMATCH;
19174   if( p->pConfig->bColumnsize ){
19175     sqlite3_stmt *pReplace = 0;
19176     rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0);
19177     if( rc==SQLITE_OK ){
19178       sqlite3_bind_null(pReplace, 1);
19179       sqlite3_bind_null(pReplace, 2);
19180       sqlite3_step(pReplace);
19181       rc = sqlite3_reset(pReplace);
19182     }
19183     if( rc==SQLITE_OK ){
19184       *piRowid = sqlite3_last_insert_rowid(p->pConfig->db);
19185     }
19186   }
19187   return rc;
19188 }
19189 
19190 /*
19191 ** Insert a new row into the FTS content table.
19192 */
19193 static int sqlite3Fts5StorageContentInsert(
19194   Fts5Storage *p,
19195   sqlite3_value **apVal,
19196   i64 *piRowid
19197 ){
19198   Fts5Config *pConfig = p->pConfig;
19199   int rc = SQLITE_OK;
19200 
19201   /* Insert the new row into the %_content table. */
19202   if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){
19203     if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){
19204       *piRowid = sqlite3_value_int64(apVal[1]);
19205     }else{
19206       rc = fts5StorageNewRowid(p, piRowid);
19207     }
19208   }else{
19209     sqlite3_stmt *pInsert = 0;    /* Statement to write %_content table */
19210     int i;                        /* Counter variable */
19211     rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
19212     for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
19213       rc = sqlite3_bind_value(pInsert, i, apVal[i]);
19214     }
19215     if( rc==SQLITE_OK ){
19216       sqlite3_step(pInsert);
19217       rc = sqlite3_reset(pInsert);
19218     }
19219     *piRowid = sqlite3_last_insert_rowid(pConfig->db);
19220   }
19221 
19222   return rc;
19223 }
19224 
19225 /*
19226 ** Insert new entries into the FTS index and %_docsize table.
19227 */
19228 static int sqlite3Fts5StorageIndexInsert(
19229   Fts5Storage *p,
19230   sqlite3_value **apVal,
19231   i64 iRowid
19232 ){
19233   Fts5Config *pConfig = p->pConfig;
19234   int rc = SQLITE_OK;             /* Return code */
19235   Fts5InsertCtx ctx;              /* Tokenization callback context object */
19236   Fts5Buffer buf;                 /* Buffer used to build up %_docsize blob */
19237 
19238   memset(&buf, 0, sizeof(Fts5Buffer));
19239   ctx.pStorage = p;
19240   rc = fts5StorageLoadTotals(p, 1);
19241 
19242   if( rc==SQLITE_OK ){
19243     rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid);
19244   }
19245   for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
19246     ctx.szCol = 0;
19247     if( pConfig->abUnindexed[ctx.iCol]==0 ){
19248       const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
19249       int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
19250       rc = sqlite3Fts5Tokenize(pConfig,
19251           FTS5_TOKENIZE_DOCUMENT,
19252           zText, nText,
19253           (void*)&ctx,
19254           fts5StorageInsertCallback
19255       );
19256     }
19257     sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
19258     p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
19259   }
19260   p->nTotalRow++;
19261 
19262   /* Write the %_docsize record */
19263   if( rc==SQLITE_OK ){
19264     rc = fts5StorageInsertDocsize(p, iRowid, &buf);
19265   }
19266   sqlite3_free(buf.p);
19267 
19268   return rc;
19269 }
19270 
19271 static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){
19272   Fts5Config *pConfig = p->pConfig;
19273   char *zSql;
19274   int rc;
19275 
19276   zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'",
19277       pConfig->zDb, pConfig->zName, zSuffix
19278   );
19279   if( zSql==0 ){
19280     rc = SQLITE_NOMEM;
19281   }else{
19282     sqlite3_stmt *pCnt = 0;
19283     rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0);
19284     if( rc==SQLITE_OK ){
19285       if( SQLITE_ROW==sqlite3_step(pCnt) ){
19286         *pnRow = sqlite3_column_int64(pCnt, 0);
19287       }
19288       rc = sqlite3_finalize(pCnt);
19289     }
19290   }
19291 
19292   sqlite3_free(zSql);
19293   return rc;
19294 }
19295 
19296 /*
19297 ** Context object used by sqlite3Fts5StorageIntegrity().
19298 */
19299 typedef struct Fts5IntegrityCtx Fts5IntegrityCtx;
19300 struct Fts5IntegrityCtx {
19301   i64 iRowid;
19302   int iCol;
19303   int szCol;
19304   u64 cksum;
19305   Fts5Termset *pTermset;
19306   Fts5Config *pConfig;
19307 };
19308 
19309 
19310 /*
19311 ** Tokenization callback used by integrity check.
19312 */
19313 static int fts5StorageIntegrityCallback(
19314   void *pContext,                 /* Pointer to Fts5IntegrityCtx object */
19315   int tflags,
19316   const char *pToken,             /* Buffer containing token */
19317   int nToken,                     /* Size of token in bytes */
19318   int iUnused1,                   /* Start offset of token */
19319   int iUnused2                    /* End offset of token */
19320 ){
19321   Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
19322   Fts5Termset *pTermset = pCtx->pTermset;
19323   int bPresent;
19324   int ii;
19325   int rc = SQLITE_OK;
19326   int iPos;
19327   int iCol;
19328 
19329   UNUSED_PARAM2(iUnused1, iUnused2);
19330   if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE;
19331 
19332   if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){
19333     pCtx->szCol++;
19334   }
19335 
19336   switch( pCtx->pConfig->eDetail ){
19337     case FTS5_DETAIL_FULL:
19338       iPos = pCtx->szCol-1;
19339       iCol = pCtx->iCol;
19340       break;
19341 
19342     case FTS5_DETAIL_COLUMNS:
19343       iPos = pCtx->iCol;
19344       iCol = 0;
19345       break;
19346 
19347     default:
19348       assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE );
19349       iPos = 0;
19350       iCol = 0;
19351       break;
19352   }
19353 
19354   rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent);
19355   if( rc==SQLITE_OK && bPresent==0 ){
19356     pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
19357         pCtx->iRowid, iCol, iPos, 0, pToken, nToken
19358     );
19359   }
19360 
19361   for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){
19362     const int nChar = pCtx->pConfig->aPrefix[ii];
19363     int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar);
19364     if( nByte ){
19365       rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent);
19366       if( bPresent==0 ){
19367         pCtx->cksum ^= sqlite3Fts5IndexEntryCksum(
19368             pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte
19369         );
19370       }
19371     }
19372   }
19373 
19374   return rc;
19375 }
19376 
19377 /*
19378 ** Check that the contents of the FTS index match that of the %_content
19379 ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return
19380 ** some other SQLite error code if an error occurs while attempting to
19381 ** determine this.
19382 */
19383 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){
19384   Fts5Config *pConfig = p->pConfig;
19385   int rc = SQLITE_OK;             /* Return code */
19386   int *aColSize;                  /* Array of size pConfig->nCol */
19387   i64 *aTotalSize;                /* Array of size pConfig->nCol */
19388   Fts5IntegrityCtx ctx;
19389   sqlite3_stmt *pScan;
19390   int bUseCksum;
19391 
19392   memset(&ctx, 0, sizeof(Fts5IntegrityCtx));
19393   ctx.pConfig = p->pConfig;
19394   aTotalSize = (i64*)sqlite3_malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64)));
19395   if( !aTotalSize ) return SQLITE_NOMEM;
19396   aColSize = (int*)&aTotalSize[pConfig->nCol];
19397   memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol);
19398 
19399   bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL
19400            || (pConfig->eContent==FTS5_CONTENT_EXTERNAL && iArg)
19401   );
19402   if( bUseCksum ){
19403     /* Generate the expected index checksum based on the contents of the
19404     ** %_content table. This block stores the checksum in ctx.cksum. */
19405     rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0);
19406     if( rc==SQLITE_OK ){
19407       int rc2;
19408       while( SQLITE_ROW==sqlite3_step(pScan) ){
19409         int i;
19410         ctx.iRowid = sqlite3_column_int64(pScan, 0);
19411         ctx.szCol = 0;
19412         if( pConfig->bColumnsize ){
19413           rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize);
19414         }
19415         if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){
19416           rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
19417         }
19418         for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
19419           if( pConfig->abUnindexed[i] ) continue;
19420           ctx.iCol = i;
19421           ctx.szCol = 0;
19422           if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
19423             rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
19424           }
19425           if( rc==SQLITE_OK ){
19426             const char *zText = (const char*)sqlite3_column_text(pScan, i+1);
19427             int nText = sqlite3_column_bytes(pScan, i+1);
19428             rc = sqlite3Fts5Tokenize(pConfig,
19429                 FTS5_TOKENIZE_DOCUMENT,
19430                 zText, nText,
19431                 (void*)&ctx,
19432                 fts5StorageIntegrityCallback
19433             );
19434           }
19435           if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
19436             rc = FTS5_CORRUPT;
19437           }
19438           aTotalSize[i] += ctx.szCol;
19439           if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
19440             sqlite3Fts5TermsetFree(ctx.pTermset);
19441             ctx.pTermset = 0;
19442           }
19443         }
19444         sqlite3Fts5TermsetFree(ctx.pTermset);
19445         ctx.pTermset = 0;
19446 
19447         if( rc!=SQLITE_OK ) break;
19448       }
19449       rc2 = sqlite3_reset(pScan);
19450       if( rc==SQLITE_OK ) rc = rc2;
19451     }
19452 
19453     /* Test that the "totals" (sometimes called "averages") record looks Ok */
19454     if( rc==SQLITE_OK ){
19455       int i;
19456       rc = fts5StorageLoadTotals(p, 0);
19457       for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
19458         if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT;
19459       }
19460     }
19461 
19462     /* Check that the %_docsize and %_content tables contain the expected
19463     ** number of rows.  */
19464     if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){
19465       i64 nRow = 0;
19466       rc = fts5StorageCount(p, "content", &nRow);
19467       if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
19468     }
19469     if( rc==SQLITE_OK && pConfig->bColumnsize ){
19470       i64 nRow = 0;
19471       rc = fts5StorageCount(p, "docsize", &nRow);
19472       if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT;
19473     }
19474   }
19475 
19476   /* Pass the expected checksum down to the FTS index module. It will
19477   ** verify, amongst other things, that it matches the checksum generated by
19478   ** inspecting the index itself.  */
19479   if( rc==SQLITE_OK ){
19480     rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum);
19481   }
19482 
19483   sqlite3_free(aTotalSize);
19484   return rc;
19485 }
19486 
19487 /*
19488 ** Obtain an SQLite statement handle that may be used to read data from the
19489 ** %_content table.
19490 */
19491 static int sqlite3Fts5StorageStmt(
19492   Fts5Storage *p,
19493   int eStmt,
19494   sqlite3_stmt **pp,
19495   char **pzErrMsg
19496 ){
19497   int rc;
19498   assert( eStmt==FTS5_STMT_SCAN_ASC
19499        || eStmt==FTS5_STMT_SCAN_DESC
19500        || eStmt==FTS5_STMT_LOOKUP
19501   );
19502   rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg);
19503   if( rc==SQLITE_OK ){
19504     assert( p->aStmt[eStmt]==*pp );
19505     p->aStmt[eStmt] = 0;
19506   }
19507   return rc;
19508 }
19509 
19510 /*
19511 ** Release an SQLite statement handle obtained via an earlier call to
19512 ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function
19513 ** must match that passed to the sqlite3Fts5StorageStmt() call.
19514 */
19515 static void sqlite3Fts5StorageStmtRelease(
19516   Fts5Storage *p,
19517   int eStmt,
19518   sqlite3_stmt *pStmt
19519 ){
19520   assert( eStmt==FTS5_STMT_SCAN_ASC
19521        || eStmt==FTS5_STMT_SCAN_DESC
19522        || eStmt==FTS5_STMT_LOOKUP
19523   );
19524   if( p->aStmt[eStmt]==0 ){
19525     sqlite3_reset(pStmt);
19526     p->aStmt[eStmt] = pStmt;
19527   }else{
19528     sqlite3_finalize(pStmt);
19529   }
19530 }
19531 
19532 static int fts5StorageDecodeSizeArray(
19533   int *aCol, int nCol,            /* Array to populate */
19534   const u8 *aBlob, int nBlob      /* Record to read varints from */
19535 ){
19536   int i;
19537   int iOff = 0;
19538   for(i=0; i<nCol; i++){
19539     if( iOff>=nBlob ) return 1;
19540     iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]);
19541   }
19542   return (iOff!=nBlob);
19543 }
19544 
19545 /*
19546 ** Argument aCol points to an array of integers containing one entry for
19547 ** each table column. This function reads the %_docsize record for the
19548 ** specified rowid and populates aCol[] with the results.
19549 **
19550 ** An SQLite error code is returned if an error occurs, or SQLITE_OK
19551 ** otherwise.
19552 */
19553 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){
19554   int nCol = p->pConfig->nCol;    /* Number of user columns in table */
19555   sqlite3_stmt *pLookup = 0;      /* Statement to query %_docsize */
19556   int rc;                         /* Return Code */
19557 
19558   assert( p->pConfig->bColumnsize );
19559   rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0);
19560   if( pLookup ){
19561     int bCorrupt = 1;
19562     assert( rc==SQLITE_OK );
19563     sqlite3_bind_int64(pLookup, 1, iRowid);
19564     if( SQLITE_ROW==sqlite3_step(pLookup) ){
19565       const u8 *aBlob = sqlite3_column_blob(pLookup, 0);
19566       int nBlob = sqlite3_column_bytes(pLookup, 0);
19567       if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){
19568         bCorrupt = 0;
19569       }
19570     }
19571     rc = sqlite3_reset(pLookup);
19572     if( bCorrupt && rc==SQLITE_OK ){
19573       rc = FTS5_CORRUPT;
19574     }
19575   }else{
19576     assert( rc!=SQLITE_OK );
19577   }
19578 
19579   return rc;
19580 }
19581 
19582 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
19583   int rc = fts5StorageLoadTotals(p, 0);
19584   if( rc==SQLITE_OK ){
19585     *pnToken = 0;
19586     if( iCol<0 ){
19587       int i;
19588       for(i=0; i<p->pConfig->nCol; i++){
19589         *pnToken += p->aTotalSize[i];
19590       }
19591     }else if( iCol<p->pConfig->nCol ){
19592       *pnToken = p->aTotalSize[iCol];
19593     }else{
19594       rc = SQLITE_RANGE;
19595     }
19596   }
19597   return rc;
19598 }
19599 
19600 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
19601   int rc = fts5StorageLoadTotals(p, 0);
19602   if( rc==SQLITE_OK ){
19603     /* nTotalRow being zero does not necessarily indicate a corrupt
19604     ** database - it might be that the FTS5 table really does contain zero
19605     ** rows. However this function is only called from the xRowCount() API,
19606     ** and there is no way for that API to be invoked if the table contains
19607     ** no rows. Hence the FTS5_CORRUPT return.  */
19608     *pnRow = p->nTotalRow;
19609     if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT;
19610   }
19611   return rc;
19612 }
19613 
19614 /*
19615 ** Flush any data currently held in-memory to disk.
19616 */
19617 static int sqlite3Fts5StorageSync(Fts5Storage *p){
19618   int rc = SQLITE_OK;
19619   i64 iLastRowid = sqlite3_last_insert_rowid(p->pConfig->db);
19620   if( p->bTotalsValid ){
19621     rc = fts5StorageSaveTotals(p);
19622     p->bTotalsValid = 0;
19623   }
19624   if( rc==SQLITE_OK ){
19625     rc = sqlite3Fts5IndexSync(p->pIndex);
19626   }
19627   sqlite3_set_last_insert_rowid(p->pConfig->db, iLastRowid);
19628   return rc;
19629 }
19630 
19631 static int sqlite3Fts5StorageRollback(Fts5Storage *p){
19632   p->bTotalsValid = 0;
19633   return sqlite3Fts5IndexRollback(p->pIndex);
19634 }
19635 
19636 static int sqlite3Fts5StorageConfigValue(
19637   Fts5Storage *p,
19638   const char *z,
19639   sqlite3_value *pVal,
19640   int iVal
19641 ){
19642   sqlite3_stmt *pReplace = 0;
19643   int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0);
19644   if( rc==SQLITE_OK ){
19645     sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC);
19646     if( pVal ){
19647       sqlite3_bind_value(pReplace, 2, pVal);
19648     }else{
19649       sqlite3_bind_int(pReplace, 2, iVal);
19650     }
19651     sqlite3_step(pReplace);
19652     rc = sqlite3_reset(pReplace);
19653     sqlite3_bind_null(pReplace, 1);
19654   }
19655   if( rc==SQLITE_OK && pVal ){
19656     int iNew = p->pConfig->iCookie + 1;
19657     rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew);
19658     if( rc==SQLITE_OK ){
19659       p->pConfig->iCookie = iNew;
19660     }
19661   }
19662   return rc;
19663 }
19664 
19665 #line 1 "fts5_tokenize.c"
19666 /*
19667 ** 2014 May 31
19668 **
19669 ** The author disclaims copyright to this source code.  In place of
19670 ** a legal notice, here is a blessing:
19671 **
19672 **    May you do good and not evil.
19673 **    May you find forgiveness for yourself and forgive others.
19674 **    May you share freely, never taking more than you give.
19675 **
19676 ******************************************************************************
19677 */
19678 
19679 
19680 /* #include "fts5Int.h" */
19681 
19682 /**************************************************************************
19683 ** Start of ascii tokenizer implementation.
19684 */
19685 
19686 /*
19687 ** For tokenizers with no "unicode" modifier, the set of token characters
19688 ** is the same as the set of ASCII range alphanumeric characters.
19689 */
19690 static unsigned char aAsciiTokenChar[128] = {
19691   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x00..0x0F */
19692   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x10..0x1F */
19693   0, 0, 0, 0, 0, 0, 0, 0,   0, 0, 0, 0, 0, 0, 0, 0,   /* 0x20..0x2F */
19694   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 0, 0, 0, 0, 0, 0,   /* 0x30..0x3F */
19695   0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   /* 0x40..0x4F */
19696   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 0,   /* 0x50..0x5F */
19697   0, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 1, 1, 1, 1, 1,   /* 0x60..0x6F */
19698   1, 1, 1, 1, 1, 1, 1, 1,   1, 1, 1, 0, 0, 0, 0, 0,   /* 0x70..0x7F */
19699 };
19700 
19701 typedef struct AsciiTokenizer AsciiTokenizer;
19702 struct AsciiTokenizer {
19703   unsigned char aTokenChar[128];
19704 };
19705 
19706 static void fts5AsciiAddExceptions(
19707   AsciiTokenizer *p,
19708   const char *zArg,
19709   int bTokenChars
19710 ){
19711   int i;
19712   for(i=0; zArg[i]; i++){
19713     if( (zArg[i] & 0x80)==0 ){
19714       p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars;
19715     }
19716   }
19717 }
19718 
19719 /*
19720 ** Delete a "ascii" tokenizer.
19721 */
19722 static void fts5AsciiDelete(Fts5Tokenizer *p){
19723   sqlite3_free(p);
19724 }
19725 
19726 /*
19727 ** Create an "ascii" tokenizer.
19728 */
19729 static int fts5AsciiCreate(
19730   void *pUnused,
19731   const char **azArg, int nArg,
19732   Fts5Tokenizer **ppOut
19733 ){
19734   int rc = SQLITE_OK;
19735   AsciiTokenizer *p = 0;
19736   UNUSED_PARAM(pUnused);
19737   if( nArg%2 ){
19738     rc = SQLITE_ERROR;
19739   }else{
19740     p = sqlite3_malloc(sizeof(AsciiTokenizer));
19741     if( p==0 ){
19742       rc = SQLITE_NOMEM;
19743     }else{
19744       int i;
19745       memset(p, 0, sizeof(AsciiTokenizer));
19746       memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar));
19747       for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
19748         const char *zArg = azArg[i+1];
19749         if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
19750           fts5AsciiAddExceptions(p, zArg, 1);
19751         }else
19752         if( 0==sqlite3_stricmp(azArg[i], "separators") ){
19753           fts5AsciiAddExceptions(p, zArg, 0);
19754         }else{
19755           rc = SQLITE_ERROR;
19756         }
19757       }
19758       if( rc!=SQLITE_OK ){
19759         fts5AsciiDelete((Fts5Tokenizer*)p);
19760         p = 0;
19761       }
19762     }
19763   }
19764 
19765   *ppOut = (Fts5Tokenizer*)p;
19766   return rc;
19767 }
19768 
19769 
19770 static void asciiFold(char *aOut, const char *aIn, int nByte){
19771   int i;
19772   for(i=0; i<nByte; i++){
19773     char c = aIn[i];
19774     if( c>='A' && c<='Z' ) c += 32;
19775     aOut[i] = c;
19776   }
19777 }
19778 
19779 /*
19780 ** Tokenize some text using the ascii tokenizer.
19781 */
19782 static int fts5AsciiTokenize(
19783   Fts5Tokenizer *pTokenizer,
19784   void *pCtx,
19785   int iUnused,
19786   const char *pText, int nText,
19787   int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
19788 ){
19789   AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
19790   int rc = SQLITE_OK;
19791   int ie;
19792   int is = 0;
19793 
19794   char aFold[64];
19795   int nFold = sizeof(aFold);
19796   char *pFold = aFold;
19797   unsigned char *a = p->aTokenChar;
19798 
19799   UNUSED_PARAM(iUnused);
19800 
19801   while( is<nText && rc==SQLITE_OK ){
19802     int nByte;
19803 
19804     /* Skip any leading divider characters. */
19805     while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){
19806       is++;
19807     }
19808     if( is==nText ) break;
19809 
19810     /* Count the token characters */
19811     ie = is+1;
19812     while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){
19813       ie++;
19814     }
19815 
19816     /* Fold to lower case */
19817     nByte = ie-is;
19818     if( nByte>nFold ){
19819       if( pFold!=aFold ) sqlite3_free(pFold);
19820       pFold = sqlite3_malloc64((sqlite3_int64)nByte*2);
19821       if( pFold==0 ){
19822         rc = SQLITE_NOMEM;
19823         break;
19824       }
19825       nFold = nByte*2;
19826     }
19827     asciiFold(pFold, &pText[is], nByte);
19828 
19829     /* Invoke the token callback */
19830     rc = xToken(pCtx, 0, pFold, nByte, is, ie);
19831     is = ie+1;
19832   }
19833 
19834   if( pFold!=aFold ) sqlite3_free(pFold);
19835   if( rc==SQLITE_DONE ) rc = SQLITE_OK;
19836   return rc;
19837 }
19838 
19839 /**************************************************************************
19840 ** Start of unicode61 tokenizer implementation.
19841 */
19842 
19843 
19844 /*
19845 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
19846 ** from the sqlite3 source file utf.c. If this file is compiled as part
19847 ** of the amalgamation, they are not required.
19848 */
19849 #ifndef SQLITE_AMALGAMATION
19850 
19851 static const unsigned char sqlite3Utf8Trans1[] = {
19852   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19853   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
19854   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
19855   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
19856   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19857   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
19858   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
19859   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
19860 };
19861 
19862 #define READ_UTF8(zIn, zTerm, c)                           \
19863   c = *(zIn++);                                            \
19864   if( c>=0xc0 ){                                           \
19865     c = sqlite3Utf8Trans1[c-0xc0];                         \
19866     while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){            \
19867       c = (c<<6) + (0x3f & *(zIn++));                      \
19868     }                                                      \
19869     if( c<0x80                                             \
19870         || (c&0xFFFFF800)==0xD800                          \
19871         || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
19872   }
19873 
19874 
19875 #define WRITE_UTF8(zOut, c) {                          \
19876   if( c<0x00080 ){                                     \
19877     *zOut++ = (unsigned char)(c&0xFF);                 \
19878   }                                                    \
19879   else if( c<0x00800 ){                                \
19880     *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F);     \
19881     *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
19882   }                                                    \
19883   else if( c<0x10000 ){                                \
19884     *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F);    \
19885     *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F);   \
19886     *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
19887   }else{                                               \
19888     *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07);  \
19889     *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F);  \
19890     *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F);   \
19891     *zOut++ = 0x80 + (unsigned char)(c & 0x3F);        \
19892   }                                                    \
19893 }
19894 
19895 #endif /* ifndef SQLITE_AMALGAMATION */
19896 
19897 typedef struct Unicode61Tokenizer Unicode61Tokenizer;
19898 struct Unicode61Tokenizer {
19899   unsigned char aTokenChar[128];  /* ASCII range token characters */
19900   char *aFold;                    /* Buffer to fold text into */
19901   int nFold;                      /* Size of aFold[] in bytes */
19902   int eRemoveDiacritic;           /* True if remove_diacritics=1 is set */
19903   int nException;
19904   int *aiException;
19905 
19906   unsigned char aCategory[32];    /* True for token char categories */
19907 };
19908 
19909 /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */
19910 #define FTS5_REMOVE_DIACRITICS_NONE    0
19911 #define FTS5_REMOVE_DIACRITICS_SIMPLE  1
19912 #define FTS5_REMOVE_DIACRITICS_COMPLEX 2
19913 
19914 static int fts5UnicodeAddExceptions(
19915   Unicode61Tokenizer *p,          /* Tokenizer object */
19916   const char *z,                  /* Characters to treat as exceptions */
19917   int bTokenChars                 /* 1 for 'tokenchars', 0 for 'separators' */
19918 ){
19919   int rc = SQLITE_OK;
19920   int n = (int)strlen(z);
19921   int *aNew;
19922 
19923   if( n>0 ){
19924     aNew = (int*)sqlite3_realloc64(p->aiException,
19925                                    (n+p->nException)*sizeof(int));
19926     if( aNew ){
19927       int nNew = p->nException;
19928       const unsigned char *zCsr = (const unsigned char*)z;
19929       const unsigned char *zTerm = (const unsigned char*)&z[n];
19930       while( zCsr<zTerm ){
19931         u32 iCode;
19932         int bToken;
19933         READ_UTF8(zCsr, zTerm, iCode);
19934         if( iCode<128 ){
19935           p->aTokenChar[iCode] = (unsigned char)bTokenChars;
19936         }else{
19937           bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)];
19938           assert( (bToken==0 || bToken==1) );
19939           assert( (bTokenChars==0 || bTokenChars==1) );
19940           if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
19941             int i;
19942             for(i=0; i<nNew; i++){
19943               if( (u32)aNew[i]>iCode ) break;
19944             }
19945             memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
19946             aNew[i] = iCode;
19947             nNew++;
19948           }
19949         }
19950       }
19951       p->aiException = aNew;
19952       p->nException = nNew;
19953     }else{
19954       rc = SQLITE_NOMEM;
19955     }
19956   }
19957 
19958   return rc;
19959 }
19960 
19961 /*
19962 ** Return true if the p->aiException[] array contains the value iCode.
19963 */
19964 static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
19965   if( p->nException>0 ){
19966     int *a = p->aiException;
19967     int iLo = 0;
19968     int iHi = p->nException-1;
19969 
19970     while( iHi>=iLo ){
19971       int iTest = (iHi + iLo) / 2;
19972       if( iCode==a[iTest] ){
19973         return 1;
19974       }else if( iCode>a[iTest] ){
19975         iLo = iTest+1;
19976       }else{
19977         iHi = iTest-1;
19978       }
19979     }
19980   }
19981 
19982   return 0;
19983 }
19984 
19985 /*
19986 ** Delete a "unicode61" tokenizer.
19987 */
19988 static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
19989   if( pTok ){
19990     Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
19991     sqlite3_free(p->aiException);
19992     sqlite3_free(p->aFold);
19993     sqlite3_free(p);
19994   }
19995   return;
19996 }
19997 
19998 static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){
19999   const char *z = zCat;
20000 
20001   while( *z ){
20002     while( *z==' ' || *z=='\t' ) z++;
20003     if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){
20004       return SQLITE_ERROR;
20005     }
20006     while( *z!=' ' && *z!='\t' && *z!='\0' ) z++;
20007   }
20008 
20009   sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar);
20010   return SQLITE_OK;
20011 }
20012 
20013 /*
20014 ** Create a "unicode61" tokenizer.
20015 */
20016 static int fts5UnicodeCreate(
20017   void *pUnused,
20018   const char **azArg, int nArg,
20019   Fts5Tokenizer **ppOut
20020 ){
20021   int rc = SQLITE_OK;             /* Return code */
20022   Unicode61Tokenizer *p = 0;      /* New tokenizer object */
20023 
20024   UNUSED_PARAM(pUnused);
20025 
20026   if( nArg%2 ){
20027     rc = SQLITE_ERROR;
20028   }else{
20029     p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
20030     if( p ){
20031       const char *zCat = "L* N* Co";
20032       int i;
20033       memset(p, 0, sizeof(Unicode61Tokenizer));
20034 
20035       p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE;
20036       p->nFold = 64;
20037       p->aFold = sqlite3_malloc64(p->nFold * sizeof(char));
20038       if( p->aFold==0 ){
20039         rc = SQLITE_NOMEM;
20040       }
20041 
20042       /* Search for a "categories" argument */
20043       for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20044         if( 0==sqlite3_stricmp(azArg[i], "categories") ){
20045           zCat = azArg[i+1];
20046         }
20047       }
20048 
20049       if( rc==SQLITE_OK ){
20050         rc = unicodeSetCategories(p, zCat);
20051       }
20052 
20053       for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20054         const char *zArg = azArg[i+1];
20055         if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
20056           if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){
20057             rc = SQLITE_ERROR;
20058           }else{
20059             p->eRemoveDiacritic = (zArg[0] - '0');
20060             assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE
20061                  || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE
20062                  || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX
20063             );
20064           }
20065         }else
20066         if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
20067           rc = fts5UnicodeAddExceptions(p, zArg, 1);
20068         }else
20069         if( 0==sqlite3_stricmp(azArg[i], "separators") ){
20070           rc = fts5UnicodeAddExceptions(p, zArg, 0);
20071         }else
20072         if( 0==sqlite3_stricmp(azArg[i], "categories") ){
20073           /* no-op */
20074         }else{
20075           rc = SQLITE_ERROR;
20076         }
20077       }
20078 
20079     }else{
20080       rc = SQLITE_NOMEM;
20081     }
20082     if( rc!=SQLITE_OK ){
20083       fts5UnicodeDelete((Fts5Tokenizer*)p);
20084       p = 0;
20085     }
20086     *ppOut = (Fts5Tokenizer*)p;
20087   }
20088   return rc;
20089 }
20090 
20091 /*
20092 ** Return true if, for the purposes of tokenizing with the tokenizer
20093 ** passed as the first argument, codepoint iCode is considered a token
20094 ** character (not a separator).
20095 */
20096 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
20097   return (
20098     p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)]
20099     ^ fts5UnicodeIsException(p, iCode)
20100   );
20101 }
20102 
20103 static int fts5UnicodeTokenize(
20104   Fts5Tokenizer *pTokenizer,
20105   void *pCtx,
20106   int iUnused,
20107   const char *pText, int nText,
20108   int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
20109 ){
20110   Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
20111   int rc = SQLITE_OK;
20112   unsigned char *a = p->aTokenChar;
20113 
20114   unsigned char *zTerm = (unsigned char*)&pText[nText];
20115   unsigned char *zCsr = (unsigned char *)pText;
20116 
20117   /* Output buffer */
20118   char *aFold = p->aFold;
20119   int nFold = p->nFold;
20120   const char *pEnd = &aFold[nFold-6];
20121 
20122   UNUSED_PARAM(iUnused);
20123 
20124   /* Each iteration of this loop gobbles up a contiguous run of separators,
20125   ** then the next token.  */
20126   while( rc==SQLITE_OK ){
20127     u32 iCode;                    /* non-ASCII codepoint read from input */
20128     char *zOut = aFold;
20129     int is;
20130     int ie;
20131 
20132     /* Skip any separator characters. */
20133     while( 1 ){
20134       if( zCsr>=zTerm ) goto tokenize_done;
20135       if( *zCsr & 0x80 ) {
20136         /* A character outside of the ascii range. Skip past it if it is
20137         ** a separator character. Or break out of the loop if it is not. */
20138         is = zCsr - (unsigned char*)pText;
20139         READ_UTF8(zCsr, zTerm, iCode);
20140         if( fts5UnicodeIsAlnum(p, iCode) ){
20141           goto non_ascii_tokenchar;
20142         }
20143       }else{
20144         if( a[*zCsr] ){
20145           is = zCsr - (unsigned char*)pText;
20146           goto ascii_tokenchar;
20147         }
20148         zCsr++;
20149       }
20150     }
20151 
20152     /* Run through the tokenchars. Fold them into the output buffer along
20153     ** the way.  */
20154     while( zCsr<zTerm ){
20155 
20156       /* Grow the output buffer so that there is sufficient space to fit the
20157       ** largest possible utf-8 character.  */
20158       if( zOut>pEnd ){
20159         aFold = sqlite3_malloc64((sqlite3_int64)nFold*2);
20160         if( aFold==0 ){
20161           rc = SQLITE_NOMEM;
20162           goto tokenize_done;
20163         }
20164         zOut = &aFold[zOut - p->aFold];
20165         memcpy(aFold, p->aFold, nFold);
20166         sqlite3_free(p->aFold);
20167         p->aFold = aFold;
20168         p->nFold = nFold = nFold*2;
20169         pEnd = &aFold[nFold-6];
20170       }
20171 
20172       if( *zCsr & 0x80 ){
20173         /* An non-ascii-range character. Fold it into the output buffer if
20174         ** it is a token character, or break out of the loop if it is not. */
20175         READ_UTF8(zCsr, zTerm, iCode);
20176         if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){
20177  non_ascii_tokenchar:
20178           iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic);
20179           if( iCode ) WRITE_UTF8(zOut, iCode);
20180         }else{
20181           break;
20182         }
20183       }else if( a[*zCsr]==0 ){
20184         /* An ascii-range separator character. End of token. */
20185         break;
20186       }else{
20187  ascii_tokenchar:
20188         if( *zCsr>='A' && *zCsr<='Z' ){
20189           *zOut++ = *zCsr + 32;
20190         }else{
20191           *zOut++ = *zCsr;
20192         }
20193         zCsr++;
20194       }
20195       ie = zCsr - (unsigned char*)pText;
20196     }
20197 
20198     /* Invoke the token callback */
20199     rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
20200   }
20201 
20202  tokenize_done:
20203   if( rc==SQLITE_DONE ) rc = SQLITE_OK;
20204   return rc;
20205 }
20206 
20207 /**************************************************************************
20208 ** Start of porter stemmer implementation.
20209 */
20210 
20211 /* Any tokens larger than this (in bytes) are passed through without
20212 ** stemming. */
20213 #define FTS5_PORTER_MAX_TOKEN 64
20214 
20215 typedef struct PorterTokenizer PorterTokenizer;
20216 struct PorterTokenizer {
20217   fts5_tokenizer tokenizer;       /* Parent tokenizer module */
20218   Fts5Tokenizer *pTokenizer;      /* Parent tokenizer instance */
20219   char aBuf[FTS5_PORTER_MAX_TOKEN + 64];
20220 };
20221 
20222 /*
20223 ** Delete a "porter" tokenizer.
20224 */
20225 static void fts5PorterDelete(Fts5Tokenizer *pTok){
20226   if( pTok ){
20227     PorterTokenizer *p = (PorterTokenizer*)pTok;
20228     if( p->pTokenizer ){
20229       p->tokenizer.xDelete(p->pTokenizer);
20230     }
20231     sqlite3_free(p);
20232   }
20233 }
20234 
20235 /*
20236 ** Create a "porter" tokenizer.
20237 */
20238 static int fts5PorterCreate(
20239   void *pCtx,
20240   const char **azArg, int nArg,
20241   Fts5Tokenizer **ppOut
20242 ){
20243   fts5_api *pApi = (fts5_api*)pCtx;
20244   int rc = SQLITE_OK;
20245   PorterTokenizer *pRet;
20246   void *pUserdata = 0;
20247   const char *zBase = "unicode61";
20248 
20249   if( nArg>0 ){
20250     zBase = azArg[0];
20251   }
20252 
20253   pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer));
20254   if( pRet ){
20255     memset(pRet, 0, sizeof(PorterTokenizer));
20256     rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer);
20257   }else{
20258     rc = SQLITE_NOMEM;
20259   }
20260   if( rc==SQLITE_OK ){
20261     int nArg2 = (nArg>0 ? nArg-1 : 0);
20262     const char **azArg2 = (nArg2 ? &azArg[1] : 0);
20263     rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer);
20264   }
20265 
20266   if( rc!=SQLITE_OK ){
20267     fts5PorterDelete((Fts5Tokenizer*)pRet);
20268     pRet = 0;
20269   }
20270   *ppOut = (Fts5Tokenizer*)pRet;
20271   return rc;
20272 }
20273 
20274 typedef struct PorterContext PorterContext;
20275 struct PorterContext {
20276   void *pCtx;
20277   int (*xToken)(void*, int, const char*, int, int, int);
20278   char *aBuf;
20279 };
20280 
20281 typedef struct PorterRule PorterRule;
20282 struct PorterRule {
20283   const char *zSuffix;
20284   int nSuffix;
20285   int (*xCond)(char *zStem, int nStem);
20286   const char *zOutput;
20287   int nOutput;
20288 };
20289 
20290 #if 0
20291 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){
20292   int ret = -1;
20293   int nBuf = *pnBuf;
20294   PorterRule *p;
20295 
20296   for(p=aRule; p->zSuffix; p++){
20297     assert( strlen(p->zSuffix)==p->nSuffix );
20298     assert( strlen(p->zOutput)==p->nOutput );
20299     if( nBuf<p->nSuffix ) continue;
20300     if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break;
20301   }
20302 
20303   if( p->zSuffix ){
20304     int nStem = nBuf - p->nSuffix;
20305     if( p->xCond==0 || p->xCond(aBuf, nStem) ){
20306       memcpy(&aBuf[nStem], p->zOutput, p->nOutput);
20307       *pnBuf = nStem + p->nOutput;
20308       ret = p - aRule;
20309     }
20310   }
20311 
20312   return ret;
20313 }
20314 #endif
20315 
20316 static int fts5PorterIsVowel(char c, int bYIsVowel){
20317   return (
20318       c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y')
20319   );
20320 }
20321 
20322 static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){
20323   int i;
20324   int bCons = bPrevCons;
20325 
20326   /* Scan for a vowel */
20327   for(i=0; i<nStem; i++){
20328     if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break;
20329   }
20330 
20331   /* Scan for a consonent */
20332   for(i++; i<nStem; i++){
20333     if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1;
20334   }
20335   return 0;
20336 }
20337 
20338 /* porter rule condition: (m > 0) */
20339 static int fts5Porter_MGt0(char *zStem, int nStem){
20340   return !!fts5PorterGobbleVC(zStem, nStem, 0);
20341 }
20342 
20343 /* porter rule condition: (m > 1) */
20344 static int fts5Porter_MGt1(char *zStem, int nStem){
20345   int n;
20346   n = fts5PorterGobbleVC(zStem, nStem, 0);
20347   if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
20348     return 1;
20349   }
20350   return 0;
20351 }
20352 
20353 /* porter rule condition: (m = 1) */
20354 static int fts5Porter_MEq1(char *zStem, int nStem){
20355   int n;
20356   n = fts5PorterGobbleVC(zStem, nStem, 0);
20357   if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){
20358     return 1;
20359   }
20360   return 0;
20361 }
20362 
20363 /* porter rule condition: (*o) */
20364 static int fts5Porter_Ostar(char *zStem, int nStem){
20365   if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){
20366     return 0;
20367   }else{
20368     int i;
20369     int mask = 0;
20370     int bCons = 0;
20371     for(i=0; i<nStem; i++){
20372       bCons = !fts5PorterIsVowel(zStem[i], bCons);
20373       assert( bCons==0 || bCons==1 );
20374       mask = (mask << 1) + bCons;
20375     }
20376     return ((mask & 0x0007)==0x0005);
20377   }
20378 }
20379 
20380 /* porter rule condition: (m > 1 and (*S or *T)) */
20381 static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){
20382   assert( nStem>0 );
20383   return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t')
20384       && fts5Porter_MGt1(zStem, nStem);
20385 }
20386 
20387 /* porter rule condition: (*v*) */
20388 static int fts5Porter_Vowel(char *zStem, int nStem){
20389   int i;
20390   for(i=0; i<nStem; i++){
20391     if( fts5PorterIsVowel(zStem[i], i>0) ){
20392       return 1;
20393     }
20394   }
20395   return 0;
20396 }
20397 
20398 
20399 /**************************************************************************
20400 ***************************************************************************
20401 ** GENERATED CODE STARTS HERE (mkportersteps.tcl)
20402 */
20403 
20404 static int fts5PorterStep4(char *aBuf, int *pnBuf){
20405   int ret = 0;
20406   int nBuf = *pnBuf;
20407   switch( aBuf[nBuf-2] ){
20408 
20409     case 'a':
20410       if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){
20411         if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20412           *pnBuf = nBuf - 2;
20413         }
20414       }
20415       break;
20416 
20417     case 'c':
20418       if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){
20419         if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20420           *pnBuf = nBuf - 4;
20421         }
20422       }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){
20423         if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20424           *pnBuf = nBuf - 4;
20425         }
20426       }
20427       break;
20428 
20429     case 'e':
20430       if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){
20431         if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20432           *pnBuf = nBuf - 2;
20433         }
20434       }
20435       break;
20436 
20437     case 'i':
20438       if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){
20439         if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20440           *pnBuf = nBuf - 2;
20441         }
20442       }
20443       break;
20444 
20445     case 'l':
20446       if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){
20447         if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20448           *pnBuf = nBuf - 4;
20449         }
20450       }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){
20451         if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20452           *pnBuf = nBuf - 4;
20453         }
20454       }
20455       break;
20456 
20457     case 'n':
20458       if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){
20459         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20460           *pnBuf = nBuf - 3;
20461         }
20462       }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){
20463         if( fts5Porter_MGt1(aBuf, nBuf-5) ){
20464           *pnBuf = nBuf - 5;
20465         }
20466       }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){
20467         if( fts5Porter_MGt1(aBuf, nBuf-4) ){
20468           *pnBuf = nBuf - 4;
20469         }
20470       }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){
20471         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20472           *pnBuf = nBuf - 3;
20473         }
20474       }
20475       break;
20476 
20477     case 'o':
20478       if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){
20479         if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){
20480           *pnBuf = nBuf - 3;
20481         }
20482       }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){
20483         if( fts5Porter_MGt1(aBuf, nBuf-2) ){
20484           *pnBuf = nBuf - 2;
20485         }
20486       }
20487       break;
20488 
20489     case 's':
20490       if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){
20491         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20492           *pnBuf = nBuf - 3;
20493         }
20494       }
20495       break;
20496 
20497     case 't':
20498       if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){
20499         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20500           *pnBuf = nBuf - 3;
20501         }
20502       }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){
20503         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20504           *pnBuf = nBuf - 3;
20505         }
20506       }
20507       break;
20508 
20509     case 'u':
20510       if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){
20511         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20512           *pnBuf = nBuf - 3;
20513         }
20514       }
20515       break;
20516 
20517     case 'v':
20518       if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){
20519         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20520           *pnBuf = nBuf - 3;
20521         }
20522       }
20523       break;
20524 
20525     case 'z':
20526       if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){
20527         if( fts5Porter_MGt1(aBuf, nBuf-3) ){
20528           *pnBuf = nBuf - 3;
20529         }
20530       }
20531       break;
20532 
20533   }
20534   return ret;
20535 }
20536 
20537 
20538 static int fts5PorterStep1B2(char *aBuf, int *pnBuf){
20539   int ret = 0;
20540   int nBuf = *pnBuf;
20541   switch( aBuf[nBuf-2] ){
20542 
20543     case 'a':
20544       if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){
20545         memcpy(&aBuf[nBuf-2], "ate", 3);
20546         *pnBuf = nBuf - 2 + 3;
20547         ret = 1;
20548       }
20549       break;
20550 
20551     case 'b':
20552       if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){
20553         memcpy(&aBuf[nBuf-2], "ble", 3);
20554         *pnBuf = nBuf - 2 + 3;
20555         ret = 1;
20556       }
20557       break;
20558 
20559     case 'i':
20560       if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){
20561         memcpy(&aBuf[nBuf-2], "ize", 3);
20562         *pnBuf = nBuf - 2 + 3;
20563         ret = 1;
20564       }
20565       break;
20566 
20567   }
20568   return ret;
20569 }
20570 
20571 
20572 static int fts5PorterStep2(char *aBuf, int *pnBuf){
20573   int ret = 0;
20574   int nBuf = *pnBuf;
20575   switch( aBuf[nBuf-2] ){
20576 
20577     case 'a':
20578       if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){
20579         if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20580           memcpy(&aBuf[nBuf-7], "ate", 3);
20581           *pnBuf = nBuf - 7 + 3;
20582         }
20583       }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){
20584         if( fts5Porter_MGt0(aBuf, nBuf-6) ){
20585           memcpy(&aBuf[nBuf-6], "tion", 4);
20586           *pnBuf = nBuf - 6 + 4;
20587         }
20588       }
20589       break;
20590 
20591     case 'c':
20592       if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){
20593         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20594           memcpy(&aBuf[nBuf-4], "ence", 4);
20595           *pnBuf = nBuf - 4 + 4;
20596         }
20597       }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){
20598         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20599           memcpy(&aBuf[nBuf-4], "ance", 4);
20600           *pnBuf = nBuf - 4 + 4;
20601         }
20602       }
20603       break;
20604 
20605     case 'e':
20606       if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){
20607         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20608           memcpy(&aBuf[nBuf-4], "ize", 3);
20609           *pnBuf = nBuf - 4 + 3;
20610         }
20611       }
20612       break;
20613 
20614     case 'g':
20615       if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){
20616         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20617           memcpy(&aBuf[nBuf-4], "log", 3);
20618           *pnBuf = nBuf - 4 + 3;
20619         }
20620       }
20621       break;
20622 
20623     case 'l':
20624       if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){
20625         if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20626           memcpy(&aBuf[nBuf-3], "ble", 3);
20627           *pnBuf = nBuf - 3 + 3;
20628         }
20629       }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){
20630         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20631           memcpy(&aBuf[nBuf-4], "al", 2);
20632           *pnBuf = nBuf - 4 + 2;
20633         }
20634       }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){
20635         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20636           memcpy(&aBuf[nBuf-5], "ent", 3);
20637           *pnBuf = nBuf - 5 + 3;
20638         }
20639       }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){
20640         if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20641           memcpy(&aBuf[nBuf-3], "e", 1);
20642           *pnBuf = nBuf - 3 + 1;
20643         }
20644       }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){
20645         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20646           memcpy(&aBuf[nBuf-5], "ous", 3);
20647           *pnBuf = nBuf - 5 + 3;
20648         }
20649       }
20650       break;
20651 
20652     case 'o':
20653       if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){
20654         if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20655           memcpy(&aBuf[nBuf-7], "ize", 3);
20656           *pnBuf = nBuf - 7 + 3;
20657         }
20658       }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){
20659         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20660           memcpy(&aBuf[nBuf-5], "ate", 3);
20661           *pnBuf = nBuf - 5 + 3;
20662         }
20663       }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){
20664         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20665           memcpy(&aBuf[nBuf-4], "ate", 3);
20666           *pnBuf = nBuf - 4 + 3;
20667         }
20668       }
20669       break;
20670 
20671     case 's':
20672       if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){
20673         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20674           memcpy(&aBuf[nBuf-5], "al", 2);
20675           *pnBuf = nBuf - 5 + 2;
20676         }
20677       }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){
20678         if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20679           memcpy(&aBuf[nBuf-7], "ive", 3);
20680           *pnBuf = nBuf - 7 + 3;
20681         }
20682       }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){
20683         if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20684           memcpy(&aBuf[nBuf-7], "ful", 3);
20685           *pnBuf = nBuf - 7 + 3;
20686         }
20687       }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){
20688         if( fts5Porter_MGt0(aBuf, nBuf-7) ){
20689           memcpy(&aBuf[nBuf-7], "ous", 3);
20690           *pnBuf = nBuf - 7 + 3;
20691         }
20692       }
20693       break;
20694 
20695     case 't':
20696       if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){
20697         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20698           memcpy(&aBuf[nBuf-5], "al", 2);
20699           *pnBuf = nBuf - 5 + 2;
20700         }
20701       }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){
20702         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20703           memcpy(&aBuf[nBuf-5], "ive", 3);
20704           *pnBuf = nBuf - 5 + 3;
20705         }
20706       }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){
20707         if( fts5Porter_MGt0(aBuf, nBuf-6) ){
20708           memcpy(&aBuf[nBuf-6], "ble", 3);
20709           *pnBuf = nBuf - 6 + 3;
20710         }
20711       }
20712       break;
20713 
20714   }
20715   return ret;
20716 }
20717 
20718 
20719 static int fts5PorterStep3(char *aBuf, int *pnBuf){
20720   int ret = 0;
20721   int nBuf = *pnBuf;
20722   switch( aBuf[nBuf-2] ){
20723 
20724     case 'a':
20725       if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){
20726         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20727           memcpy(&aBuf[nBuf-4], "ic", 2);
20728           *pnBuf = nBuf - 4 + 2;
20729         }
20730       }
20731       break;
20732 
20733     case 's':
20734       if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){
20735         if( fts5Porter_MGt0(aBuf, nBuf-4) ){
20736           *pnBuf = nBuf - 4;
20737         }
20738       }
20739       break;
20740 
20741     case 't':
20742       if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){
20743         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20744           memcpy(&aBuf[nBuf-5], "ic", 2);
20745           *pnBuf = nBuf - 5 + 2;
20746         }
20747       }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){
20748         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20749           memcpy(&aBuf[nBuf-5], "ic", 2);
20750           *pnBuf = nBuf - 5 + 2;
20751         }
20752       }
20753       break;
20754 
20755     case 'u':
20756       if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){
20757         if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20758           *pnBuf = nBuf - 3;
20759         }
20760       }
20761       break;
20762 
20763     case 'v':
20764       if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){
20765         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20766           *pnBuf = nBuf - 5;
20767         }
20768       }
20769       break;
20770 
20771     case 'z':
20772       if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){
20773         if( fts5Porter_MGt0(aBuf, nBuf-5) ){
20774           memcpy(&aBuf[nBuf-5], "al", 2);
20775           *pnBuf = nBuf - 5 + 2;
20776         }
20777       }
20778       break;
20779 
20780   }
20781   return ret;
20782 }
20783 
20784 
20785 static int fts5PorterStep1B(char *aBuf, int *pnBuf){
20786   int ret = 0;
20787   int nBuf = *pnBuf;
20788   switch( aBuf[nBuf-2] ){
20789 
20790     case 'e':
20791       if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){
20792         if( fts5Porter_MGt0(aBuf, nBuf-3) ){
20793           memcpy(&aBuf[nBuf-3], "ee", 2);
20794           *pnBuf = nBuf - 3 + 2;
20795         }
20796       }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){
20797         if( fts5Porter_Vowel(aBuf, nBuf-2) ){
20798           *pnBuf = nBuf - 2;
20799           ret = 1;
20800         }
20801       }
20802       break;
20803 
20804     case 'n':
20805       if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){
20806         if( fts5Porter_Vowel(aBuf, nBuf-3) ){
20807           *pnBuf = nBuf - 3;
20808           ret = 1;
20809         }
20810       }
20811       break;
20812 
20813   }
20814   return ret;
20815 }
20816 
20817 /*
20818 ** GENERATED CODE ENDS HERE (mkportersteps.tcl)
20819 ***************************************************************************
20820 **************************************************************************/
20821 
20822 static void fts5PorterStep1A(char *aBuf, int *pnBuf){
20823   int nBuf = *pnBuf;
20824   if( aBuf[nBuf-1]=='s' ){
20825     if( aBuf[nBuf-2]=='e' ){
20826       if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s')
20827        || (nBuf>3 && aBuf[nBuf-3]=='i' )
20828       ){
20829         *pnBuf = nBuf-2;
20830       }else{
20831         *pnBuf = nBuf-1;
20832       }
20833     }
20834     else if( aBuf[nBuf-2]!='s' ){
20835       *pnBuf = nBuf-1;
20836     }
20837   }
20838 }
20839 
20840 static int fts5PorterCb(
20841   void *pCtx,
20842   int tflags,
20843   const char *pToken,
20844   int nToken,
20845   int iStart,
20846   int iEnd
20847 ){
20848   PorterContext *p = (PorterContext*)pCtx;
20849 
20850   char *aBuf;
20851   int nBuf;
20852 
20853   if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through;
20854   aBuf = p->aBuf;
20855   nBuf = nToken;
20856   memcpy(aBuf, pToken, nBuf);
20857 
20858   /* Step 1. */
20859   fts5PorterStep1A(aBuf, &nBuf);
20860   if( fts5PorterStep1B(aBuf, &nBuf) ){
20861     if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){
20862       char c = aBuf[nBuf-1];
20863       if( fts5PorterIsVowel(c, 0)==0
20864        && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2]
20865       ){
20866         nBuf--;
20867       }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){
20868         aBuf[nBuf++] = 'e';
20869       }
20870     }
20871   }
20872 
20873   /* Step 1C. */
20874   if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){
20875     aBuf[nBuf-1] = 'i';
20876   }
20877 
20878   /* Steps 2 through 4. */
20879   fts5PorterStep2(aBuf, &nBuf);
20880   fts5PorterStep3(aBuf, &nBuf);
20881   fts5PorterStep4(aBuf, &nBuf);
20882 
20883   /* Step 5a. */
20884   assert( nBuf>0 );
20885   if( aBuf[nBuf-1]=='e' ){
20886     if( fts5Porter_MGt1(aBuf, nBuf-1)
20887      || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1))
20888     ){
20889       nBuf--;
20890     }
20891   }
20892 
20893   /* Step 5b. */
20894   if( nBuf>1 && aBuf[nBuf-1]=='l'
20895    && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1)
20896   ){
20897     nBuf--;
20898   }
20899 
20900   return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
20901 
20902  pass_through:
20903   return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
20904 }
20905 
20906 /*
20907 ** Tokenize using the porter tokenizer.
20908 */
20909 static int fts5PorterTokenize(
20910   Fts5Tokenizer *pTokenizer,
20911   void *pCtx,
20912   int flags,
20913   const char *pText, int nText,
20914   int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
20915 ){
20916   PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
20917   PorterContext sCtx;
20918   sCtx.xToken = xToken;
20919   sCtx.pCtx = pCtx;
20920   sCtx.aBuf = p->aBuf;
20921   return p->tokenizer.xTokenize(
20922       p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb
20923   );
20924 }
20925 
20926 /**************************************************************************
20927 ** Start of trigram implementation.
20928 */
20929 typedef struct TrigramTokenizer TrigramTokenizer;
20930 struct TrigramTokenizer {
20931   int bFold;                      /* True to fold to lower-case */
20932 };
20933 
20934 /*
20935 ** Free a trigram tokenizer.
20936 */
20937 static void fts5TriDelete(Fts5Tokenizer *p){
20938   sqlite3_free(p);
20939 }
20940 
20941 /*
20942 ** Allocate a trigram tokenizer.
20943 */
20944 static int fts5TriCreate(
20945   void *pUnused,
20946   const char **azArg,
20947   int nArg,
20948   Fts5Tokenizer **ppOut
20949 ){
20950   int rc = SQLITE_OK;
20951   TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew));
20952   UNUSED_PARAM(pUnused);
20953   if( pNew==0 ){
20954     rc = SQLITE_NOMEM;
20955   }else{
20956     int i;
20957     pNew->bFold = 1;
20958     for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
20959       const char *zArg = azArg[i+1];
20960       if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){
20961         if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
20962           rc = SQLITE_ERROR;
20963         }else{
20964           pNew->bFold = (zArg[0]=='0');
20965         }
20966       }else{
20967         rc = SQLITE_ERROR;
20968       }
20969     }
20970     if( rc!=SQLITE_OK ){
20971       fts5TriDelete((Fts5Tokenizer*)pNew);
20972       pNew = 0;
20973     }
20974   }
20975   *ppOut = (Fts5Tokenizer*)pNew;
20976   return rc;
20977 }
20978 
20979 /*
20980 ** Trigram tokenizer tokenize routine.
20981 */
20982 static int fts5TriTokenize(
20983   Fts5Tokenizer *pTok,
20984   void *pCtx,
20985   int unusedFlags,
20986   const char *pText, int nText,
20987   int (*xToken)(void*, int, const char*, int, int, int)
20988 ){
20989   TrigramTokenizer *p = (TrigramTokenizer*)pTok;
20990   int rc = SQLITE_OK;
20991   char aBuf[32];
20992   const unsigned char *zIn = (const unsigned char*)pText;
20993   const unsigned char *zEof = &zIn[nText];
20994   u32 iCode;
20995 
20996   UNUSED_PARAM(unusedFlags);
20997   while( 1 ){
20998     char *zOut = aBuf;
20999     int iStart = zIn - (const unsigned char*)pText;
21000     const unsigned char *zNext;
21001 
21002     READ_UTF8(zIn, zEof, iCode);
21003     if( iCode==0 ) break;
21004     zNext = zIn;
21005     if( zIn<zEof ){
21006       if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21007       WRITE_UTF8(zOut, iCode);
21008       READ_UTF8(zIn, zEof, iCode);
21009       if( iCode==0 ) break;
21010     }else{
21011       break;
21012     }
21013     if( zIn<zEof ){
21014       if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21015       WRITE_UTF8(zOut, iCode);
21016       READ_UTF8(zIn, zEof, iCode);
21017       if( iCode==0 ) break;
21018       if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0);
21019       WRITE_UTF8(zOut, iCode);
21020     }else{
21021       break;
21022     }
21023     rc = xToken(pCtx, 0, aBuf, zOut-aBuf, iStart, iStart + zOut-aBuf);
21024     if( rc!=SQLITE_OK ) break;
21025     zIn = zNext;
21026   }
21027 
21028   return rc;
21029 }
21030 
21031 /*
21032 ** Argument xCreate is a pointer to a constructor function for a tokenizer.
21033 ** pTok is a tokenizer previously created using the same method. This function
21034 ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB
21035 ** indicating the style of pattern matching that the tokenizer can support.
21036 ** In practice, this is:
21037 **
21038 **     "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB
21039 **     "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE
21040 **     all other tokenizers - FTS5_PATTERN_NONE
21041 */
21042 static int sqlite3Fts5TokenizerPattern(
21043     int (*xCreate)(void*, const char**, int, Fts5Tokenizer**),
21044     Fts5Tokenizer *pTok
21045 ){
21046   if( xCreate==fts5TriCreate ){
21047     TrigramTokenizer *p = (TrigramTokenizer*)pTok;
21048     return p->bFold ? FTS5_PATTERN_LIKE : FTS5_PATTERN_GLOB;
21049   }
21050   return FTS5_PATTERN_NONE;
21051 }
21052 
21053 /*
21054 ** Register all built-in tokenizers with FTS5.
21055 */
21056 static int sqlite3Fts5TokenizerInit(fts5_api *pApi){
21057   struct BuiltinTokenizer {
21058     const char *zName;
21059     fts5_tokenizer x;
21060   } aBuiltin[] = {
21061     { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
21062     { "ascii",     {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }},
21063     { "porter",    {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
21064     { "trigram",   {fts5TriCreate, fts5TriDelete, fts5TriTokenize}},
21065   };
21066 
21067   int rc = SQLITE_OK;             /* Return code */
21068   int i;                          /* To iterate through builtin functions */
21069 
21070   for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){
21071     rc = pApi->xCreateTokenizer(pApi,
21072         aBuiltin[i].zName,
21073         (void*)pApi,
21074         &aBuiltin[i].x,
21075         0
21076     );
21077   }
21078 
21079   return rc;
21080 }
21081 
21082 #line 1 "fts5_unicode2.c"
21083 /*
21084 ** 2012-05-25
21085 **
21086 ** The author disclaims copyright to this source code.  In place of
21087 ** a legal notice, here is a blessing:
21088 **
21089 **    May you do good and not evil.
21090 **    May you find forgiveness for yourself and forgive others.
21091 **    May you share freely, never taking more than you give.
21092 **
21093 ******************************************************************************
21094 */
21095 
21096 /*
21097 ** DO NOT EDIT THIS MACHINE GENERATED FILE.
21098 */
21099 
21100 
21101 #include <assert.h>
21102 
21103 
21104 
21105 /*
21106 ** If the argument is a codepoint corresponding to a lowercase letter
21107 ** in the ASCII range with a diacritic added, return the codepoint
21108 ** of the ASCII letter only. For example, if passed 235 - "LATIN
21109 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
21110 ** E"). The resuls of passing a codepoint that corresponds to an
21111 ** uppercase letter are undefined.
21112 */
21113 static int fts5_remove_diacritic(int c, int bComplex){
21114   unsigned short aDia[] = {
21115         0,  1797,  1848,  1859,  1891,  1928,  1940,  1995,
21116      2024,  2040,  2060,  2110,  2168,  2206,  2264,  2286,
21117      2344,  2383,  2472,  2488,  2516,  2596,  2668,  2732,
21118      2782,  2842,  2894,  2954,  2984,  3000,  3028,  3336,
21119      3456,  3696,  3712,  3728,  3744,  3766,  3832,  3896,
21120      3912,  3928,  3944,  3968,  4008,  4040,  4056,  4106,
21121      4138,  4170,  4202,  4234,  4266,  4296,  4312,  4344,
21122      4408,  4424,  4442,  4472,  4488,  4504,  6148,  6198,
21123      6264,  6280,  6360,  6429,  6505,  6529, 61448, 61468,
21124     61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704,
21125     61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914,
21126     61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218,
21127     62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554,
21128     62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766,
21129     62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118,
21130     63182, 63242, 63274, 63310, 63368, 63390,
21131   };
21132 #define HIBIT ((unsigned char)0x80)
21133   unsigned char aChar[] = {
21134     '\0',      'a',       'c',       'e',       'i',       'n',
21135     'o',       'u',       'y',       'y',       'a',       'c',
21136     'd',       'e',       'e',       'g',       'h',       'i',
21137     'j',       'k',       'l',       'n',       'o',       'r',
21138     's',       't',       'u',       'u',       'w',       'y',
21139     'z',       'o',       'u',       'a',       'i',       'o',
21140     'u',       'u'|HIBIT, 'a'|HIBIT, 'g',       'k',       'o',
21141     'o'|HIBIT, 'j',       'g',       'n',       'a'|HIBIT, 'a',
21142     'e',       'i',       'o',       'r',       'u',       's',
21143     't',       'h',       'a',       'e',       'o'|HIBIT, 'o',
21144     'o'|HIBIT, 'y',       '\0',      '\0',      '\0',      '\0',
21145     '\0',      '\0',      '\0',      '\0',      'a',       'b',
21146     'c'|HIBIT, 'd',       'd',       'e'|HIBIT, 'e',       'e'|HIBIT,
21147     'f',       'g',       'h',       'h',       'i',       'i'|HIBIT,
21148     'k',       'l',       'l'|HIBIT, 'l',       'm',       'n',
21149     'o'|HIBIT, 'p',       'r',       'r'|HIBIT, 'r',       's',
21150     's'|HIBIT, 't',       'u',       'u'|HIBIT, 'v',       'w',
21151     'w',       'x',       'y',       'z',       'h',       't',
21152     'w',       'y',       'a',       'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT,
21153     'e',       'e'|HIBIT, 'e'|HIBIT, 'i',       'o',       'o'|HIBIT,
21154     'o'|HIBIT, 'o'|HIBIT, 'u',       'u'|HIBIT, 'u'|HIBIT, 'y',
21155   };
21156 
21157   unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
21158   int iRes = 0;
21159   int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
21160   int iLo = 0;
21161   while( iHi>=iLo ){
21162     int iTest = (iHi + iLo) / 2;
21163     if( key >= aDia[iTest] ){
21164       iRes = iTest;
21165       iLo = iTest+1;
21166     }else{
21167       iHi = iTest-1;
21168     }
21169   }
21170   assert( key>=aDia[iRes] );
21171   if( bComplex==0 && (aChar[iRes] & 0x80) ) return c;
21172   return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F);
21173 }
21174 
21175 
21176 /*
21177 ** Return true if the argument interpreted as a unicode codepoint
21178 ** is a diacritical modifier character.
21179 */
21180 static int sqlite3Fts5UnicodeIsdiacritic(int c){
21181   unsigned int mask0 = 0x08029FDF;
21182   unsigned int mask1 = 0x000361F8;
21183   if( c<768 || c>817 ) return 0;
21184   return (c < 768+32) ?
21185       (mask0 & ((unsigned int)1 << (c-768))) :
21186       (mask1 & ((unsigned int)1 << (c-768-32)));
21187 }
21188 
21189 
21190 /*
21191 ** Interpret the argument as a unicode codepoint. If the codepoint
21192 ** is an upper case character that has a lower case equivalent,
21193 ** return the codepoint corresponding to the lower case version.
21194 ** Otherwise, return a copy of the argument.
21195 **
21196 ** The results are undefined if the value passed to this function
21197 ** is less than zero.
21198 */
21199 static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){
21200   /* Each entry in the following array defines a rule for folding a range
21201   ** of codepoints to lower case. The rule applies to a range of nRange
21202   ** codepoints starting at codepoint iCode.
21203   **
21204   ** If the least significant bit in flags is clear, then the rule applies
21205   ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
21206   ** need to be folded). Or, if it is set, then the rule only applies to
21207   ** every second codepoint in the range, starting with codepoint C.
21208   **
21209   ** The 7 most significant bits in flags are an index into the aiOff[]
21210   ** array. If a specific codepoint C does require folding, then its lower
21211   ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
21212   **
21213   ** The contents of this array are generated by parsing the CaseFolding.txt
21214   ** file distributed as part of the "Unicode Character Database". See
21215   ** http://www.unicode.org for details.
21216   */
21217   static const struct TableEntry {
21218     unsigned short iCode;
21219     unsigned char flags;
21220     unsigned char nRange;
21221   } aEntry[] = {
21222     {65, 14, 26},          {181, 64, 1},          {192, 14, 23},
21223     {216, 14, 7},          {256, 1, 48},          {306, 1, 6},
21224     {313, 1, 16},          {330, 1, 46},          {376, 116, 1},
21225     {377, 1, 6},           {383, 104, 1},         {385, 50, 1},
21226     {386, 1, 4},           {390, 44, 1},          {391, 0, 1},
21227     {393, 42, 2},          {395, 0, 1},           {398, 32, 1},
21228     {399, 38, 1},          {400, 40, 1},          {401, 0, 1},
21229     {403, 42, 1},          {404, 46, 1},          {406, 52, 1},
21230     {407, 48, 1},          {408, 0, 1},           {412, 52, 1},
21231     {413, 54, 1},          {415, 56, 1},          {416, 1, 6},
21232     {422, 60, 1},          {423, 0, 1},           {425, 60, 1},
21233     {428, 0, 1},           {430, 60, 1},          {431, 0, 1},
21234     {433, 58, 2},          {435, 1, 4},           {439, 62, 1},
21235     {440, 0, 1},           {444, 0, 1},           {452, 2, 1},
21236     {453, 0, 1},           {455, 2, 1},           {456, 0, 1},
21237     {458, 2, 1},           {459, 1, 18},          {478, 1, 18},
21238     {497, 2, 1},           {498, 1, 4},           {502, 122, 1},
21239     {503, 134, 1},         {504, 1, 40},          {544, 110, 1},
21240     {546, 1, 18},          {570, 70, 1},          {571, 0, 1},
21241     {573, 108, 1},         {574, 68, 1},          {577, 0, 1},
21242     {579, 106, 1},         {580, 28, 1},          {581, 30, 1},
21243     {582, 1, 10},          {837, 36, 1},          {880, 1, 4},
21244     {886, 0, 1},           {902, 18, 1},          {904, 16, 3},
21245     {908, 26, 1},          {910, 24, 2},          {913, 14, 17},
21246     {931, 14, 9},          {962, 0, 1},           {975, 4, 1},
21247     {976, 140, 1},         {977, 142, 1},         {981, 146, 1},
21248     {982, 144, 1},         {984, 1, 24},          {1008, 136, 1},
21249     {1009, 138, 1},        {1012, 130, 1},        {1013, 128, 1},
21250     {1015, 0, 1},          {1017, 152, 1},        {1018, 0, 1},
21251     {1021, 110, 3},        {1024, 34, 16},        {1040, 14, 32},
21252     {1120, 1, 34},         {1162, 1, 54},         {1216, 6, 1},
21253     {1217, 1, 14},         {1232, 1, 88},         {1329, 22, 38},
21254     {4256, 66, 38},        {4295, 66, 1},         {4301, 66, 1},
21255     {7680, 1, 150},        {7835, 132, 1},        {7838, 96, 1},
21256     {7840, 1, 96},         {7944, 150, 8},        {7960, 150, 6},
21257     {7976, 150, 8},        {7992, 150, 8},        {8008, 150, 6},
21258     {8025, 151, 8},        {8040, 150, 8},        {8072, 150, 8},
21259     {8088, 150, 8},        {8104, 150, 8},        {8120, 150, 2},
21260     {8122, 126, 2},        {8124, 148, 1},        {8126, 100, 1},
21261     {8136, 124, 4},        {8140, 148, 1},        {8152, 150, 2},
21262     {8154, 120, 2},        {8168, 150, 2},        {8170, 118, 2},
21263     {8172, 152, 1},        {8184, 112, 2},        {8186, 114, 2},
21264     {8188, 148, 1},        {8486, 98, 1},         {8490, 92, 1},
21265     {8491, 94, 1},         {8498, 12, 1},         {8544, 8, 16},
21266     {8579, 0, 1},          {9398, 10, 26},        {11264, 22, 47},
21267     {11360, 0, 1},         {11362, 88, 1},        {11363, 102, 1},
21268     {11364, 90, 1},        {11367, 1, 6},         {11373, 84, 1},
21269     {11374, 86, 1},        {11375, 80, 1},        {11376, 82, 1},
21270     {11378, 0, 1},         {11381, 0, 1},         {11390, 78, 2},
21271     {11392, 1, 100},       {11499, 1, 4},         {11506, 0, 1},
21272     {42560, 1, 46},        {42624, 1, 24},        {42786, 1, 14},
21273     {42802, 1, 62},        {42873, 1, 4},         {42877, 76, 1},
21274     {42878, 1, 10},        {42891, 0, 1},         {42893, 74, 1},
21275     {42896, 1, 4},         {42912, 1, 10},        {42922, 72, 1},
21276     {65313, 14, 26},
21277   };
21278   static const unsigned short aiOff[] = {
21279    1,     2,     8,     15,    16,    26,    28,    32,
21280    37,    38,    40,    48,    63,    64,    69,    71,
21281    79,    80,    116,   202,   203,   205,   206,   207,
21282    209,   210,   211,   213,   214,   217,   218,   219,
21283    775,   7264,  10792, 10795, 23228, 23256, 30204, 54721,
21284    54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
21285    57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
21286    65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
21287    65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
21288    65514, 65521, 65527, 65528, 65529,
21289   };
21290 
21291   int ret = c;
21292 
21293   assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
21294 
21295   if( c<128 ){
21296     if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
21297   }else if( c<65536 ){
21298     const struct TableEntry *p;
21299     int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
21300     int iLo = 0;
21301     int iRes = -1;
21302 
21303     assert( c>aEntry[0].iCode );
21304     while( iHi>=iLo ){
21305       int iTest = (iHi + iLo) / 2;
21306       int cmp = (c - aEntry[iTest].iCode);
21307       if( cmp>=0 ){
21308         iRes = iTest;
21309         iLo = iTest+1;
21310       }else{
21311         iHi = iTest-1;
21312       }
21313     }
21314 
21315     assert( iRes>=0 && c>=aEntry[iRes].iCode );
21316     p = &aEntry[iRes];
21317     if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
21318       ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
21319       assert( ret>0 );
21320     }
21321 
21322     if( eRemoveDiacritic ){
21323       ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2);
21324     }
21325   }
21326 
21327   else if( c>=66560 && c<66600 ){
21328     ret = c + 40;
21329   }
21330 
21331   return ret;
21332 }
21333 
21334 
21335 static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){
21336   aArray[0] = 1;
21337   switch( zCat[0] ){
21338     case 'C':
21339           switch( zCat[1] ){
21340             case 'c': aArray[1] = 1; break;
21341             case 'f': aArray[2] = 1; break;
21342             case 'n': aArray[3] = 1; break;
21343             case 's': aArray[4] = 1; break;
21344             case 'o': aArray[31] = 1; break;
21345             case '*':
21346               aArray[1] = 1;
21347               aArray[2] = 1;
21348               aArray[3] = 1;
21349               aArray[4] = 1;
21350               aArray[31] = 1;
21351               break;
21352             default: return 1;          }
21353           break;
21354 
21355     case 'L':
21356           switch( zCat[1] ){
21357             case 'l': aArray[5] = 1; break;
21358             case 'm': aArray[6] = 1; break;
21359             case 'o': aArray[7] = 1; break;
21360             case 't': aArray[8] = 1; break;
21361             case 'u': aArray[9] = 1; break;
21362             case 'C': aArray[30] = 1; break;
21363             case '*':
21364               aArray[5] = 1;
21365               aArray[6] = 1;
21366               aArray[7] = 1;
21367               aArray[8] = 1;
21368               aArray[9] = 1;
21369               aArray[30] = 1;
21370               break;
21371             default: return 1;          }
21372           break;
21373 
21374     case 'M':
21375           switch( zCat[1] ){
21376             case 'c': aArray[10] = 1; break;
21377             case 'e': aArray[11] = 1; break;
21378             case 'n': aArray[12] = 1; break;
21379             case '*':
21380               aArray[10] = 1;
21381               aArray[11] = 1;
21382               aArray[12] = 1;
21383               break;
21384             default: return 1;          }
21385           break;
21386 
21387     case 'N':
21388           switch( zCat[1] ){
21389             case 'd': aArray[13] = 1; break;
21390             case 'l': aArray[14] = 1; break;
21391             case 'o': aArray[15] = 1; break;
21392             case '*':
21393               aArray[13] = 1;
21394               aArray[14] = 1;
21395               aArray[15] = 1;
21396               break;
21397             default: return 1;          }
21398           break;
21399 
21400     case 'P':
21401           switch( zCat[1] ){
21402             case 'c': aArray[16] = 1; break;
21403             case 'd': aArray[17] = 1; break;
21404             case 'e': aArray[18] = 1; break;
21405             case 'f': aArray[19] = 1; break;
21406             case 'i': aArray[20] = 1; break;
21407             case 'o': aArray[21] = 1; break;
21408             case 's': aArray[22] = 1; break;
21409             case '*':
21410               aArray[16] = 1;
21411               aArray[17] = 1;
21412               aArray[18] = 1;
21413               aArray[19] = 1;
21414               aArray[20] = 1;
21415               aArray[21] = 1;
21416               aArray[22] = 1;
21417               break;
21418             default: return 1;          }
21419           break;
21420 
21421     case 'S':
21422           switch( zCat[1] ){
21423             case 'c': aArray[23] = 1; break;
21424             case 'k': aArray[24] = 1; break;
21425             case 'm': aArray[25] = 1; break;
21426             case 'o': aArray[26] = 1; break;
21427             case '*':
21428               aArray[23] = 1;
21429               aArray[24] = 1;
21430               aArray[25] = 1;
21431               aArray[26] = 1;
21432               break;
21433             default: return 1;          }
21434           break;
21435 
21436     case 'Z':
21437           switch( zCat[1] ){
21438             case 'l': aArray[27] = 1; break;
21439             case 'p': aArray[28] = 1; break;
21440             case 's': aArray[29] = 1; break;
21441             case '*':
21442               aArray[27] = 1;
21443               aArray[28] = 1;
21444               aArray[29] = 1;
21445               break;
21446             default: return 1;          }
21447           break;
21448 
21449   }
21450   return 0;
21451 }
21452 
21453 static u16 aFts5UnicodeBlock[] = {
21454     0,     1471,  1753,  1760,  1760,  1760,  1760,  1760,  1760,  1760,
21455     1760,  1760,  1760,  1760,  1760,  1763,  1765,
21456   };
21457 static u16 aFts5UnicodeMap[] = {
21458     0,     32,    33,    36,    37,    40,    41,    42,    43,    44,
21459     45,    46,    48,    58,    60,    63,    65,    91,    92,    93,
21460     94,    95,    96,    97,    123,   124,   125,   126,   127,   160,
21461     161,   162,   166,   167,   168,   169,   170,   171,   172,   173,
21462     174,   175,   176,   177,   178,   180,   181,   182,   184,   185,
21463     186,   187,   188,   191,   192,   215,   216,   223,   247,   248,
21464     256,   312,   313,   329,   330,   377,   383,   385,   387,   388,
21465     391,   394,   396,   398,   402,   403,   405,   406,   409,   412,
21466     414,   415,   417,   418,   423,   427,   428,   431,   434,   436,
21467     437,   440,   442,   443,   444,   446,   448,   452,   453,   454,
21468     455,   456,   457,   458,   459,   460,   461,   477,   478,   496,
21469     497,   498,   499,   500,   503,   505,   506,   564,   570,   572,
21470     573,   575,   577,   580,   583,   584,   592,   660,   661,   688,
21471     706,   710,   722,   736,   741,   748,   749,   750,   751,   768,
21472     880,   884,   885,   886,   890,   891,   894,   900,   902,   903,
21473     904,   908,   910,   912,   913,   931,   940,   975,   977,   978,
21474     981,   984,   1008,  1012,  1014,  1015,  1018,  1020,  1021,  1072,
21475     1120,  1154,  1155,  1160,  1162,  1217,  1231,  1232,  1329,  1369,
21476     1370,  1377,  1417,  1418,  1423,  1425,  1470,  1471,  1472,  1473,
21477     1475,  1476,  1478,  1479,  1488,  1520,  1523,  1536,  1542,  1545,
21478     1547,  1548,  1550,  1552,  1563,  1566,  1568,  1600,  1601,  1611,
21479     1632,  1642,  1646,  1648,  1649,  1748,  1749,  1750,  1757,  1758,
21480     1759,  1765,  1767,  1769,  1770,  1774,  1776,  1786,  1789,  1791,
21481     1792,  1807,  1808,  1809,  1810,  1840,  1869,  1958,  1969,  1984,
21482     1994,  2027,  2036,  2038,  2039,  2042,  2048,  2070,  2074,  2075,
21483     2084,  2085,  2088,  2089,  2096,  2112,  2137,  2142,  2208,  2210,
21484     2276,  2304,  2307,  2308,  2362,  2363,  2364,  2365,  2366,  2369,
21485     2377,  2381,  2382,  2384,  2385,  2392,  2402,  2404,  2406,  2416,
21486     2417,  2418,  2425,  2433,  2434,  2437,  2447,  2451,  2474,  2482,
21487     2486,  2492,  2493,  2494,  2497,  2503,  2507,  2509,  2510,  2519,
21488     2524,  2527,  2530,  2534,  2544,  2546,  2548,  2554,  2555,  2561,
21489     2563,  2565,  2575,  2579,  2602,  2610,  2613,  2616,  2620,  2622,
21490     2625,  2631,  2635,  2641,  2649,  2654,  2662,  2672,  2674,  2677,
21491     2689,  2691,  2693,  2703,  2707,  2730,  2738,  2741,  2748,  2749,
21492     2750,  2753,  2759,  2761,  2763,  2765,  2768,  2784,  2786,  2790,
21493     2800,  2801,  2817,  2818,  2821,  2831,  2835,  2858,  2866,  2869,
21494     2876,  2877,  2878,  2879,  2880,  2881,  2887,  2891,  2893,  2902,
21495     2903,  2908,  2911,  2914,  2918,  2928,  2929,  2930,  2946,  2947,
21496     2949,  2958,  2962,  2969,  2972,  2974,  2979,  2984,  2990,  3006,
21497     3008,  3009,  3014,  3018,  3021,  3024,  3031,  3046,  3056,  3059,
21498     3065,  3066,  3073,  3077,  3086,  3090,  3114,  3125,  3133,  3134,
21499     3137,  3142,  3146,  3157,  3160,  3168,  3170,  3174,  3192,  3199,
21500     3202,  3205,  3214,  3218,  3242,  3253,  3260,  3261,  3262,  3263,
21501     3264,  3270,  3271,  3274,  3276,  3285,  3294,  3296,  3298,  3302,
21502     3313,  3330,  3333,  3342,  3346,  3389,  3390,  3393,  3398,  3402,
21503     3405,  3406,  3415,  3424,  3426,  3430,  3440,  3449,  3450,  3458,
21504     3461,  3482,  3507,  3517,  3520,  3530,  3535,  3538,  3542,  3544,
21505     3570,  3572,  3585,  3633,  3634,  3636,  3647,  3648,  3654,  3655,
21506     3663,  3664,  3674,  3713,  3716,  3719,  3722,  3725,  3732,  3737,
21507     3745,  3749,  3751,  3754,  3757,  3761,  3762,  3764,  3771,  3773,
21508     3776,  3782,  3784,  3792,  3804,  3840,  3841,  3844,  3859,  3860,
21509     3861,  3864,  3866,  3872,  3882,  3892,  3893,  3894,  3895,  3896,
21510     3897,  3898,  3899,  3900,  3901,  3902,  3904,  3913,  3953,  3967,
21511     3968,  3973,  3974,  3976,  3981,  3993,  4030,  4038,  4039,  4046,
21512     4048,  4053,  4057,  4096,  4139,  4141,  4145,  4146,  4152,  4153,
21513     4155,  4157,  4159,  4160,  4170,  4176,  4182,  4184,  4186,  4190,
21514     4193,  4194,  4197,  4199,  4206,  4209,  4213,  4226,  4227,  4229,
21515     4231,  4237,  4238,  4239,  4240,  4250,  4253,  4254,  4256,  4295,
21516     4301,  4304,  4347,  4348,  4349,  4682,  4688,  4696,  4698,  4704,
21517     4746,  4752,  4786,  4792,  4800,  4802,  4808,  4824,  4882,  4888,
21518     4957,  4960,  4969,  4992,  5008,  5024,  5120,  5121,  5741,  5743,
21519     5760,  5761,  5787,  5788,  5792,  5867,  5870,  5888,  5902,  5906,
21520     5920,  5938,  5941,  5952,  5970,  5984,  5998,  6002,  6016,  6068,
21521     6070,  6071,  6078,  6086,  6087,  6089,  6100,  6103,  6104,  6107,
21522     6108,  6109,  6112,  6128,  6144,  6150,  6151,  6155,  6158,  6160,
21523     6176,  6211,  6212,  6272,  6313,  6314,  6320,  6400,  6432,  6435,
21524     6439,  6441,  6448,  6450,  6451,  6457,  6464,  6468,  6470,  6480,
21525     6512,  6528,  6576,  6593,  6600,  6608,  6618,  6622,  6656,  6679,
21526     6681,  6686,  6688,  6741,  6742,  6743,  6744,  6752,  6753,  6754,
21527     6755,  6757,  6765,  6771,  6783,  6784,  6800,  6816,  6823,  6824,
21528     6912,  6916,  6917,  6964,  6965,  6966,  6971,  6972,  6973,  6978,
21529     6979,  6981,  6992,  7002,  7009,  7019,  7028,  7040,  7042,  7043,
21530     7073,  7074,  7078,  7080,  7082,  7083,  7084,  7086,  7088,  7098,
21531     7142,  7143,  7144,  7146,  7149,  7150,  7151,  7154,  7164,  7168,
21532     7204,  7212,  7220,  7222,  7227,  7232,  7245,  7248,  7258,  7288,
21533     7294,  7360,  7376,  7379,  7380,  7393,  7394,  7401,  7405,  7406,
21534     7410,  7412,  7413,  7424,  7468,  7531,  7544,  7545,  7579,  7616,
21535     7676,  7680,  7830,  7838,  7936,  7944,  7952,  7960,  7968,  7976,
21536     7984,  7992,  8000,  8008,  8016,  8025,  8027,  8029,  8031,  8033,
21537     8040,  8048,  8064,  8072,  8080,  8088,  8096,  8104,  8112,  8118,
21538     8120,  8124,  8125,  8126,  8127,  8130,  8134,  8136,  8140,  8141,
21539     8144,  8150,  8152,  8157,  8160,  8168,  8173,  8178,  8182,  8184,
21540     8188,  8189,  8192,  8203,  8208,  8214,  8216,  8217,  8218,  8219,
21541     8221,  8222,  8223,  8224,  8232,  8233,  8234,  8239,  8240,  8249,
21542     8250,  8251,  8255,  8257,  8260,  8261,  8262,  8263,  8274,  8275,
21543     8276,  8277,  8287,  8288,  8298,  8304,  8305,  8308,  8314,  8317,
21544     8318,  8319,  8320,  8330,  8333,  8334,  8336,  8352,  8400,  8413,
21545     8417,  8418,  8421,  8448,  8450,  8451,  8455,  8456,  8458,  8459,
21546     8462,  8464,  8467,  8468,  8469,  8470,  8472,  8473,  8478,  8484,
21547     8485,  8486,  8487,  8488,  8489,  8490,  8494,  8495,  8496,  8500,
21548     8501,  8505,  8506,  8508,  8510,  8512,  8517,  8519,  8522,  8523,
21549     8524,  8526,  8527,  8528,  8544,  8579,  8581,  8585,  8592,  8597,
21550     8602,  8604,  8608,  8609,  8611,  8612,  8614,  8615,  8622,  8623,
21551     8654,  8656,  8658,  8659,  8660,  8661,  8692,  8960,  8968,  8972,
21552     8992,  8994,  9001,  9002,  9003,  9084,  9085,  9115,  9140,  9180,
21553     9186,  9216,  9280,  9312,  9372,  9450,  9472,  9655,  9656,  9665,
21554     9666,  9720,  9728,  9839,  9840,  9985,  10088, 10089, 10090, 10091,
21555     10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101,
21556     10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217,
21557     10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627,
21558     10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637,
21559     10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647,
21560     10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750,
21561     11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365,
21562     11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393,
21563     11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520,
21564     11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696,
21565     11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780,
21566     11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800,
21567     11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812,
21568     11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904,
21569     11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296,
21570     12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306,
21571     12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317,
21572     12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347,
21573     12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449,
21574     12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736,
21575     12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938,
21576     12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981,
21577     40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528,
21578     42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624,
21579     42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800,
21580     42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912,
21581     43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043,
21582     43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136,
21583     43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264,
21584     43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395,
21585     43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472,
21586     43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588,
21587     43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643,
21588     43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713,
21589     43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762,
21590     43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003,
21591     44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203,
21592     55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112,
21593     64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320,
21594     64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020,
21595     65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075,
21596     65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086,
21597     65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097,
21598     65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118,
21599     65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279,
21600     65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294,
21601     65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343,
21602     65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378,
21603     65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490,
21604     65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529,
21605     65532, 0,     13,    40,    60,    63,    80,    128,   256,   263,
21606     311,   320,   373,   377,   394,   400,   464,   509,   640,   672,
21607     768,   800,   816,   833,   834,   842,   896,   927,   928,   968,
21608     976,   977,   1024,  1064,  1104,  1184,  2048,  2056,  2058,  2103,
21609     2108,  2111,  2135,  2136,  2304,  2326,  2335,  2336,  2367,  2432,
21610     2494,  2560,  2561,  2565,  2572,  2576,  2581,  2585,  2616,  2623,
21611     2624,  2640,  2656,  2685,  2687,  2816,  2873,  2880,  2904,  2912,
21612     2936,  3072,  3680,  4096,  4097,  4098,  4099,  4152,  4167,  4178,
21613     4198,  4224,  4226,  4227,  4272,  4275,  4279,  4281,  4283,  4285,
21614     4286,  4304,  4336,  4352,  4355,  4391,  4396,  4397,  4406,  4416,
21615     4480,  4482,  4483,  4531,  4534,  4543,  4545,  4549,  4560,  5760,
21616     5803,  5804,  5805,  5806,  5808,  5814,  5815,  5824,  8192,  9216,
21617     9328,  12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248,
21618     53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637,
21619     53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298,
21620     54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441,
21621     54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541,
21622     54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662,
21623     54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922,
21624     54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062,
21625     55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178,
21626     55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961,
21627     60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003,
21628     61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028,
21629     61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099,
21630     61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744,
21631     61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368,
21632     62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971,
21633     63045, 63104, 63232, 0,     42710, 42752, 46900, 46912, 47133, 63488,
21634     1,     32,    256,   0,     65533,
21635   };
21636 static u16 aFts5UnicodeData[] = {
21637     1025,  61,    117,   55,    117,   54,    50,    53,    57,    53,
21638     49,    85,    333,   85,    121,   85,    841,   54,    53,    50,
21639     56,    48,    56,    837,   54,    57,    50,    57,    1057,  61,
21640     53,    151,   58,    53,    56,    58,    39,    52,    57,    34,
21641     58,    56,    58,    57,    79,    56,    37,    85,    56,    47,
21642     39,    51,    111,   53,    745,   57,    233,   773,   57,    261,
21643     1822,  37,    542,   37,    1534,  222,   69,    73,    37,    126,
21644     126,   73,    69,    137,   37,    73,    37,    105,   101,   73,
21645     37,    73,    37,    190,   158,   37,    126,   126,   73,    37,
21646     126,   94,    37,    39,    94,    69,    135,   41,    40,    37,
21647     41,    40,    37,    41,    40,    37,    542,   37,    606,   37,
21648     41,    40,    37,    126,   73,    37,    1886,  197,   73,    37,
21649     73,    69,    126,   105,   37,    286,   2181,  39,    869,   582,
21650     152,   390,   472,   166,   248,   38,    56,    38,    568,   3596,
21651     158,   38,    56,    94,    38,    101,   53,    88,    41,    53,
21652     105,   41,    73,    37,    553,   297,   1125,  94,    37,    105,
21653     101,   798,   133,   94,    57,    126,   94,    37,    1641,  1541,
21654     1118,  58,    172,   75,    1790,  478,   37,    2846,  1225,  38,
21655     213,   1253,  53,    49,    55,    1452,  49,    44,    53,    76,
21656     53,    76,    53,    44,    871,   103,   85,    162,   121,   85,
21657     55,    85,    90,    364,   53,    85,    1031,  38,    327,   684,
21658     333,   149,   71,    44,    3175,  53,    39,    236,   34,    58,
21659     204,   70,    76,    58,    140,   71,    333,   103,   90,    39,
21660     469,   34,    39,    44,    967,   876,   2855,  364,   39,    333,
21661     1063,  300,   70,    58,    117,   38,    711,   140,   38,    300,
21662     38,    108,   38,    172,   501,   807,   108,   53,    39,    359,
21663     876,   108,   42,    1735,  44,    42,    44,    39,    106,   268,
21664     138,   44,    74,    39,    236,   327,   76,    85,    333,   53,
21665     38,    199,   231,   44,    74,    263,   71,    711,   231,   39,
21666     135,   44,    39,    106,   140,   74,    74,    44,    39,    42,
21667     71,    103,   76,    333,   71,    87,    207,   58,    55,    76,
21668     42,    199,   71,    711,   231,   71,    71,    71,    44,    106,
21669     76,    76,    108,   44,    135,   39,    333,   76,    103,   44,
21670     76,    42,    295,   103,   711,   231,   71,    167,   44,    39,
21671     106,   172,   76,    42,    74,    44,    39,    71,    76,    333,
21672     53,    55,    44,    74,    263,   71,    711,   231,   71,    167,
21673     44,    39,    42,    44,    42,    140,   74,    74,    44,    44,
21674     42,    71,    103,   76,    333,   58,    39,    207,   44,    39,
21675     199,   103,   135,   71,    39,    71,    71,    103,   391,   74,
21676     44,    74,    106,   106,   44,    39,    42,    333,   111,   218,
21677     55,    58,    106,   263,   103,   743,   327,   167,   39,    108,
21678     138,   108,   140,   76,    71,    71,    76,    333,   239,   58,
21679     74,    263,   103,   743,   327,   167,   44,    39,    42,    44,
21680     170,   44,    74,    74,    76,    74,    39,    71,    76,    333,
21681     71,    74,    263,   103,   1319,  39,    106,   140,   106,   106,
21682     44,    39,    42,    71,    76,    333,   207,   58,    199,   74,
21683     583,   775,   295,   39,    231,   44,    106,   108,   44,    266,
21684     74,    53,    1543,  44,    71,    236,   55,    199,   38,    268,
21685     53,    333,   85,    71,    39,    71,    39,    39,    135,   231,
21686     103,   39,    39,    71,    135,   44,    71,    204,   76,    39,
21687     167,   38,    204,   333,   135,   39,    122,   501,   58,    53,
21688     122,   76,    218,   333,   335,   58,    44,    58,    44,    58,
21689     44,    54,    50,    54,    50,    74,    263,   1159,  460,   42,
21690     172,   53,    76,    167,   364,   1164,  282,   44,    218,   90,
21691     181,   154,   85,    1383,  74,    140,   42,    204,   42,    76,
21692     74,    76,    39,    333,   213,   199,   74,    76,    135,   108,
21693     39,    106,   71,    234,   103,   140,   423,   44,    74,    76,
21694     202,   44,    39,    42,    333,   106,   44,    90,    1225,  41,
21695     41,    1383,  53,    38,    10631, 135,   231,   39,    135,   1319,
21696     135,   1063,  135,   231,   39,    135,   487,   1831,  135,   2151,
21697     108,   309,   655,   519,   346,   2727,  49,    19847, 85,    551,
21698     61,    839,   54,    50,    2407,  117,   110,   423,   135,   108,
21699     583,   108,   85,    583,   76,    423,   103,   76,    1671,  76,
21700     42,    236,   266,   44,    74,    364,   117,   38,    117,   55,
21701     39,    44,    333,   335,   213,   49,    149,   108,   61,    333,
21702     1127,  38,    1671,  1319,  44,    39,    2247,  935,   108,   138,
21703     76,    106,   74,    44,    202,   108,   58,    85,    333,   967,
21704     167,   1415,  554,   231,   74,    333,   47,    1114,  743,   76,
21705     106,   85,    1703,  42,    44,    42,    236,   44,    42,    44,
21706     74,    268,   202,   332,   44,    333,   333,   245,   38,    213,
21707     140,   42,    1511,  44,    42,    172,   42,    44,    170,   44,
21708     74,    231,   333,   245,   346,   300,   314,   76,    42,    967,
21709     42,    140,   74,    76,    42,    44,    74,    71,    333,   1415,
21710     44,    42,    76,    106,   44,    42,    108,   74,    149,   1159,
21711     266,   268,   74,    76,    181,   333,   103,   333,   967,   198,
21712     85,    277,   108,   53,    428,   42,    236,   135,   44,    135,
21713     74,    44,    71,    1413,  2022,  421,   38,    1093,  1190,  1260,
21714     140,   4830,  261,   3166,  261,   265,   197,   201,   261,   265,
21715     261,   265,   197,   201,   261,   41,    41,    41,    94,    229,
21716     265,   453,   261,   264,   261,   264,   261,   264,   165,   69,
21717     137,   40,    56,    37,    120,   101,   69,    137,   40,    120,
21718     133,   69,    137,   120,   261,   169,   120,   101,   69,    137,
21719     40,    88,    381,   162,   209,   85,    52,    51,    54,    84,
21720     51,    54,    52,    277,   59,    60,    162,   61,    309,   52,
21721     51,    149,   80,    117,   57,    54,    50,    373,   57,    53,
21722     48,    341,   61,    162,   194,   47,    38,    207,   121,   54,
21723     50,    38,    335,   121,   54,    50,    422,   855,   428,   139,
21724     44,    107,   396,   90,    41,    154,   41,    90,    37,    105,
21725     69,    105,   37,    58,    41,    90,    57,    169,   218,   41,
21726     58,    41,    58,    41,    58,    137,   58,    37,    137,   37,
21727     135,   37,    90,    69,    73,    185,   94,    101,   58,    57,
21728     90,    37,    58,    527,   1134,  94,    142,   47,    185,   186,
21729     89,    154,   57,    90,    57,    90,    57,    250,   57,    1018,
21730     89,    90,    57,    58,    57,    1018,  8601,  282,   153,   666,
21731     89,    250,   54,    50,    2618,  57,    986,   825,   1306,  217,
21732     602,   1274,  378,   1935,  2522,  719,   5882,  57,    314,   57,
21733     1754,  281,   3578,  57,    4634,  3322,  54,    50,    54,    50,
21734     54,    50,    54,    50,    54,    50,    54,    50,    54,    50,
21735     975,   1434,  185,   54,    50,    1017,  54,    50,    54,    50,
21736     54,    50,    54,    50,    54,    50,    537,   8218,  4217,  54,
21737     50,    54,    50,    54,    50,    54,    50,    54,    50,    54,
21738     50,    54,    50,    54,    50,    54,    50,    54,    50,    54,
21739     50,    2041,  54,    50,    54,    50,    1049,  54,    50,    8281,
21740     1562,  697,   90,    217,   346,   1513,  1509,  126,   73,    69,
21741     254,   105,   37,    94,    37,    94,    165,   70,    105,   37,
21742     3166,  37,    218,   158,   108,   94,    149,   47,    85,    1221,
21743     37,    37,    1799,  38,    53,    44,    743,   231,   231,   231,
21744     231,   231,   231,   231,   231,   1036,  85,    52,    51,    52,
21745     51,    117,   52,    51,    53,    52,    51,    309,   49,    85,
21746     49,    53,    52,    51,    85,    52,    51,    54,    50,    54,
21747     50,    54,    50,    54,    50,    181,   38,    341,   81,    858,
21748     2874,  6874,  410,   61,    117,   58,    38,    39,    46,    54,
21749     50,    54,    50,    54,    50,    54,    50,    54,    50,    90,
21750     54,    50,    54,    50,    54,    50,    54,    50,    49,    54,
21751     82,    58,    302,   140,   74,    49,    166,   90,    110,   38,
21752     39,    53,    90,    2759,  76,    88,    70,    39,    49,    2887,
21753     53,    102,   39,    1319,  3015,  90,    143,   346,   871,   1178,
21754     519,   1018,  335,   986,   271,   58,    495,   1050,  335,   1274,
21755     495,   2042,  8218,  39,    39,    2074,  39,    39,    679,   38,
21756     36583, 1786,  1287,  198,   85,    8583,  38,    117,   519,   333,
21757     71,    1502,  39,    44,    107,   53,    332,   53,    38,    798,
21758     44,    2247,  334,   76,    213,   760,   294,   88,    478,   69,
21759     2014,  38,    261,   190,   350,   38,    88,    158,   158,   382,
21760     70,    37,    231,   44,    103,   44,    135,   44,    743,   74,
21761     76,    42,    154,   207,   90,    55,    58,    1671,  149,   74,
21762     1607,  522,   44,    85,    333,   588,   199,   117,   39,    333,
21763     903,   268,   85,    743,   364,   74,    53,    935,   108,   42,
21764     1511,  44,    74,    140,   74,    44,    138,   437,   38,    333,
21765     85,    1319,  204,   74,    76,    74,    76,    103,   44,    263,
21766     44,    42,    333,   149,   519,   38,    199,   122,   39,    42,
21767     1543,  44,    39,    108,   71,    76,    167,   76,    39,    44,
21768     39,    71,    38,    85,    359,   42,    76,    74,    85,    39,
21769     70,    42,    44,    199,   199,   199,   231,   231,   1127,  74,
21770     44,    74,    44,    74,    53,    42,    44,    333,   39,    39,
21771     743,   1575,  36,    68,    68,    36,    63,    63,    11719, 3399,
21772     229,   165,   39,    44,    327,   57,    423,   167,   39,    71,
21773     71,    3463,  536,   11623, 54,    50,    2055,  1735,  391,   55,
21774     58,    524,   245,   54,    50,    53,    236,   53,    81,    80,
21775     54,    50,    54,    50,    54,    50,    54,    50,    54,    50,
21776     54,    50,    54,    50,    54,    50,    85,    54,    50,    149,
21777     112,   117,   149,   49,    54,    50,    54,    50,    54,    50,
21778     117,   57,    49,    121,   53,    55,    85,    167,   4327,  34,
21779     117,   55,    117,   54,    50,    53,    57,    53,    49,    85,
21780     333,   85,    121,   85,    841,   54,    53,    50,    56,    48,
21781     56,    837,   54,    57,    50,    57,    54,    50,    53,    54,
21782     50,    85,    327,   38,    1447,  70,    999,   199,   199,   199,
21783     103,   87,    57,    56,    58,    87,    58,    153,   90,    98,
21784     90,    391,   839,   615,   71,    487,   455,   3943,  117,   1455,
21785     314,   1710,  143,   570,   47,    410,   1466,  44,    935,   1575,
21786     999,   143,   551,   46,    263,   46,    967,   53,    1159,  263,
21787     53,    174,   1289,  1285,  2503,  333,   199,   39,    1415,  71,
21788     39,    743,   53,    271,   711,   207,   53,    839,   53,    1799,
21789     71,    39,    108,   76,    140,   135,   103,   871,   108,   44,
21790     271,   309,   935,   79,    53,    1735,  245,   711,   271,   615,
21791     271,   2343,  1007,  42,    44,    42,    1703,  492,   245,   655,
21792     333,   76,    42,    1447,  106,   140,   74,    76,    85,    34,
21793     149,   807,   333,   108,   1159,  172,   42,    268,   333,   149,
21794     76,    42,    1543,  106,   300,   74,    135,   149,   333,   1383,
21795     44,    42,    44,    74,    204,   42,    44,    333,   28135, 3182,
21796     149,   34279, 18215, 2215,  39,    1482,  140,   422,   71,    7898,
21797     1274,  1946,  74,    108,   122,   202,   258,   268,   90,    236,
21798     986,   140,   1562,  2138,  108,   58,    2810,  591,   841,   837,
21799     841,   229,   581,   841,   837,   41,    73,    41,    73,    137,
21800     265,   133,   37,    229,   357,   841,   837,   73,    137,   265,
21801     233,   837,   73,    137,   169,   41,    233,   837,   841,   837,
21802     841,   837,   841,   837,   841,   837,   841,   837,   841,   901,
21803     809,   57,    805,   57,    197,   809,   57,    805,   57,    197,
21804     809,   57,    805,   57,    197,   809,   57,    805,   57,    197,
21805     809,   57,    805,   57,    197,   94,    1613,  135,   871,   71,
21806     39,    39,    327,   135,   39,    39,    39,    39,    39,    39,
21807     103,   71,    39,    39,    39,    39,    39,    39,    71,    39,
21808     135,   231,   135,   135,   39,    327,   551,   103,   167,   551,
21809     89,    1434,  3226,  506,   474,   506,   506,   367,   1018,  1946,
21810     1402,  954,   1402,  314,   90,    1082,  218,   2266,  666,   1210,
21811     186,   570,   2042,  58,    5850,  154,   2010,  154,   794,   2266,
21812     378,   2266,  3738,  39,    39,    39,    39,    39,    39,    17351,
21813     34,    3074,  7692,  63,    63,
21814   };
21815 
21816 static int sqlite3Fts5UnicodeCategory(u32 iCode) {
21817   int iRes = -1;
21818   int iHi;
21819   int iLo;
21820   int ret;
21821   u16 iKey;
21822 
21823   if( iCode>=(1<<20) ){
21824     return 0;
21825   }
21826   iLo = aFts5UnicodeBlock[(iCode>>16)];
21827   iHi = aFts5UnicodeBlock[1+(iCode>>16)];
21828   iKey = (iCode & 0xFFFF);
21829   while( iHi>iLo ){
21830     int iTest = (iHi + iLo) / 2;
21831     assert( iTest>=iLo && iTest<iHi );
21832     if( iKey>=aFts5UnicodeMap[iTest] ){
21833       iRes = iTest;
21834       iLo = iTest+1;
21835     }else{
21836       iHi = iTest;
21837     }
21838   }
21839 
21840   if( iRes<0 ) return 0;
21841   if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0;
21842   ret = aFts5UnicodeData[iRes] & 0x1F;
21843   if( ret!=30 ) return ret;
21844   return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9;
21845 }
21846 
21847 static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){
21848   int i = 0;
21849   int iTbl = 0;
21850   while( i<128 ){
21851     int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ];
21852     int n = (aFts5UnicodeData[iTbl] >> 5) + i;
21853     for(; i<128 && i<n; i++){
21854       aAscii[i] = (u8)bToken;
21855     }
21856     iTbl++;
21857   }
21858   aAscii[0] = 0;                  /* 0x00 is never a token character */
21859 }
21860 
21861 
21862 #line 1 "fts5_varint.c"
21863 /*
21864 ** 2015 May 30
21865 **
21866 ** The author disclaims copyright to this source code.  In place of
21867 ** a legal notice, here is a blessing:
21868 **
21869 **    May you do good and not evil.
21870 **    May you find forgiveness for yourself and forgive others.
21871 **    May you share freely, never taking more than you give.
21872 **
21873 ******************************************************************************
21874 **
21875 ** Routines for varint serialization and deserialization.
21876 */
21877 
21878 
21879 /* #include "fts5Int.h" */
21880 
21881 /*
21882 ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core.
21883 ** Except, this version does handle the single byte case that the core
21884 ** version depends on being handled before its function is called.
21885 */
21886 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){
21887   u32 a,b;
21888 
21889   /* The 1-byte case. Overwhelmingly the most common. */
21890   a = *p;
21891   /* a: p0 (unmasked) */
21892   if (!(a&0x80))
21893   {
21894     /* Values between 0 and 127 */
21895     *v = a;
21896     return 1;
21897   }
21898 
21899   /* The 2-byte case */
21900   p++;
21901   b = *p;
21902   /* b: p1 (unmasked) */
21903   if (!(b&0x80))
21904   {
21905     /* Values between 128 and 16383 */
21906     a &= 0x7f;
21907     a = a<<7;
21908     *v = a | b;
21909     return 2;
21910   }
21911 
21912   /* The 3-byte case */
21913   p++;
21914   a = a<<14;
21915   a |= *p;
21916   /* a: p0<<14 | p2 (unmasked) */
21917   if (!(a&0x80))
21918   {
21919     /* Values between 16384 and 2097151 */
21920     a &= (0x7f<<14)|(0x7f);
21921     b &= 0x7f;
21922     b = b<<7;
21923     *v = a | b;
21924     return 3;
21925   }
21926 
21927   /* A 32-bit varint is used to store size information in btrees.
21928   ** Objects are rarely larger than 2MiB limit of a 3-byte varint.
21929   ** A 3-byte varint is sufficient, for example, to record the size
21930   ** of a 1048569-byte BLOB or string.
21931   **
21932   ** We only unroll the first 1-, 2-, and 3- byte cases.  The very
21933   ** rare larger cases can be handled by the slower 64-bit varint
21934   ** routine.
21935   */
21936   {
21937     u64 v64;
21938     u8 n;
21939     p -= 2;
21940     n = sqlite3Fts5GetVarint(p, &v64);
21941     *v = ((u32)v64) & 0x7FFFFFFF;
21942     assert( n>3 && n<=9 );
21943     return n;
21944   }
21945 }
21946 
21947 
21948 /*
21949 ** Bitmasks used by sqlite3GetVarint().  These precomputed constants
21950 ** are defined here rather than simply putting the constant expressions
21951 ** inline in order to work around bugs in the RVT compiler.
21952 **
21953 ** SLOT_2_0     A mask for  (0x7f<<14) | 0x7f
21954 **
21955 ** SLOT_4_2_0   A mask for  (0x7f<<28) | SLOT_2_0
21956 */
21957 #define SLOT_2_0     0x001fc07f
21958 #define SLOT_4_2_0   0xf01fc07f
21959 
21960 /*
21961 ** Read a 64-bit variable-length integer from memory starting at p[0].
21962 ** Return the number of bytes read.  The value is stored in *v.
21963 */
21964 static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){
21965   u32 a,b,s;
21966 
21967   a = *p;
21968   /* a: p0 (unmasked) */
21969   if (!(a&0x80))
21970   {
21971     *v = a;
21972     return 1;
21973   }
21974 
21975   p++;
21976   b = *p;
21977   /* b: p1 (unmasked) */
21978   if (!(b&0x80))
21979   {
21980     a &= 0x7f;
21981     a = a<<7;
21982     a |= b;
21983     *v = a;
21984     return 2;
21985   }
21986 
21987   /* Verify that constants are precomputed correctly */
21988   assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) );
21989   assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) );
21990 
21991   p++;
21992   a = a<<14;
21993   a |= *p;
21994   /* a: p0<<14 | p2 (unmasked) */
21995   if (!(a&0x80))
21996   {
21997     a &= SLOT_2_0;
21998     b &= 0x7f;
21999     b = b<<7;
22000     a |= b;
22001     *v = a;
22002     return 3;
22003   }
22004 
22005   /* CSE1 from below */
22006   a &= SLOT_2_0;
22007   p++;
22008   b = b<<14;
22009   b |= *p;
22010   /* b: p1<<14 | p3 (unmasked) */
22011   if (!(b&0x80))
22012   {
22013     b &= SLOT_2_0;
22014     /* moved CSE1 up */
22015     /* a &= (0x7f<<14)|(0x7f); */
22016     a = a<<7;
22017     a |= b;
22018     *v = a;
22019     return 4;
22020   }
22021 
22022   /* a: p0<<14 | p2 (masked) */
22023   /* b: p1<<14 | p3 (unmasked) */
22024   /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22025   /* moved CSE1 up */
22026   /* a &= (0x7f<<14)|(0x7f); */
22027   b &= SLOT_2_0;
22028   s = a;
22029   /* s: p0<<14 | p2 (masked) */
22030 
22031   p++;
22032   a = a<<14;
22033   a |= *p;
22034   /* a: p0<<28 | p2<<14 | p4 (unmasked) */
22035   if (!(a&0x80))
22036   {
22037     /* we can skip these cause they were (effectively) done above in calc'ing s */
22038     /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
22039     /* b &= (0x7f<<14)|(0x7f); */
22040     b = b<<7;
22041     a |= b;
22042     s = s>>18;
22043     *v = ((u64)s)<<32 | a;
22044     return 5;
22045   }
22046 
22047   /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22048   s = s<<7;
22049   s |= b;
22050   /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */
22051 
22052   p++;
22053   b = b<<14;
22054   b |= *p;
22055   /* b: p1<<28 | p3<<14 | p5 (unmasked) */
22056   if (!(b&0x80))
22057   {
22058     /* we can skip this cause it was (effectively) done above in calc'ing s */
22059     /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */
22060     a &= SLOT_2_0;
22061     a = a<<7;
22062     a |= b;
22063     s = s>>18;
22064     *v = ((u64)s)<<32 | a;
22065     return 6;
22066   }
22067 
22068   p++;
22069   a = a<<14;
22070   a |= *p;
22071   /* a: p2<<28 | p4<<14 | p6 (unmasked) */
22072   if (!(a&0x80))
22073   {
22074     a &= SLOT_4_2_0;
22075     b &= SLOT_2_0;
22076     b = b<<7;
22077     a |= b;
22078     s = s>>11;
22079     *v = ((u64)s)<<32 | a;
22080     return 7;
22081   }
22082 
22083   /* CSE2 from below */
22084   a &= SLOT_2_0;
22085   p++;
22086   b = b<<14;
22087   b |= *p;
22088   /* b: p3<<28 | p5<<14 | p7 (unmasked) */
22089   if (!(b&0x80))
22090   {
22091     b &= SLOT_4_2_0;
22092     /* moved CSE2 up */
22093     /* a &= (0x7f<<14)|(0x7f); */
22094     a = a<<7;
22095     a |= b;
22096     s = s>>4;
22097     *v = ((u64)s)<<32 | a;
22098     return 8;
22099   }
22100 
22101   p++;
22102   a = a<<15;
22103   a |= *p;
22104   /* a: p4<<29 | p6<<15 | p8 (unmasked) */
22105 
22106   /* moved CSE2 up */
22107   /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */
22108   b &= SLOT_2_0;
22109   b = b<<8;
22110   a |= b;
22111 
22112   s = s<<4;
22113   b = p[-4];
22114   b &= 0x7f;
22115   b = b>>3;
22116   s |= b;
22117 
22118   *v = ((u64)s)<<32 | a;
22119 
22120   return 9;
22121 }
22122 
22123 /*
22124 ** The variable-length integer encoding is as follows:
22125 **
22126 ** KEY:
22127 **         A = 0xxxxxxx    7 bits of data and one flag bit
22128 **         B = 1xxxxxxx    7 bits of data and one flag bit
22129 **         C = xxxxxxxx    8 bits of data
22130 **
22131 **  7 bits - A
22132 ** 14 bits - BA
22133 ** 21 bits - BBA
22134 ** 28 bits - BBBA
22135 ** 35 bits - BBBBA
22136 ** 42 bits - BBBBBA
22137 ** 49 bits - BBBBBBA
22138 ** 56 bits - BBBBBBBA
22139 ** 64 bits - BBBBBBBBC
22140 */
22141 
22142 #ifdef SQLITE_NOINLINE
22143 # define FTS5_NOINLINE SQLITE_NOINLINE
22144 #else
22145 # define FTS5_NOINLINE
22146 #endif
22147 
22148 /*
22149 ** Write a 64-bit variable-length integer to memory starting at p[0].
22150 ** The length of data write will be between 1 and 9 bytes.  The number
22151 ** of bytes written is returned.
22152 **
22153 ** A variable-length integer consists of the lower 7 bits of each byte
22154 ** for all bytes that have the 8th bit set and one byte with the 8th
22155 ** bit clear.  Except, if we get to the 9th byte, it stores the full
22156 ** 8 bits and is the last byte.
22157 */
22158 static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){
22159   int i, j, n;
22160   u8 buf[10];
22161   if( v & (((u64)0xff000000)<<32) ){
22162     p[8] = (u8)v;
22163     v >>= 8;
22164     for(i=7; i>=0; i--){
22165       p[i] = (u8)((v & 0x7f) | 0x80);
22166       v >>= 7;
22167     }
22168     return 9;
22169   }
22170   n = 0;
22171   do{
22172     buf[n++] = (u8)((v & 0x7f) | 0x80);
22173     v >>= 7;
22174   }while( v!=0 );
22175   buf[0] &= 0x7f;
22176   assert( n<=9 );
22177   for(i=0, j=n-1; j>=0; j--, i++){
22178     p[i] = buf[j];
22179   }
22180   return n;
22181 }
22182 
22183 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){
22184   if( v<=0x7f ){
22185     p[0] = v&0x7f;
22186     return 1;
22187   }
22188   if( v<=0x3fff ){
22189     p[0] = ((v>>7)&0x7f)|0x80;
22190     p[1] = v&0x7f;
22191     return 2;
22192   }
22193   return fts5PutVarint64(p,v);
22194 }
22195 
22196 
22197 static int sqlite3Fts5GetVarintLen(u32 iVal){
22198 #if 0
22199   if( iVal<(1 << 7 ) ) return 1;
22200 #endif
22201   assert( iVal>=(1 << 7) );
22202   if( iVal<(1 << 14) ) return 2;
22203   if( iVal<(1 << 21) ) return 3;
22204   if( iVal<(1 << 28) ) return 4;
22205   return 5;
22206 }
22207 
22208 #line 1 "fts5_vocab.c"
22209 /*
22210 ** 2015 May 08
22211 **
22212 ** The author disclaims copyright to this source code.  In place of
22213 ** a legal notice, here is a blessing:
22214 **
22215 **    May you do good and not evil.
22216 **    May you find forgiveness for yourself and forgive others.
22217 **    May you share freely, never taking more than you give.
22218 **
22219 ******************************************************************************
22220 **
22221 ** This is an SQLite virtual table module implementing direct access to an
22222 ** existing FTS5 index. The module may create several different types of
22223 ** tables:
22224 **
22225 ** col:
22226 **     CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col));
22227 **
22228 **   One row for each term/column combination. The value of $doc is set to
22229 **   the number of fts5 rows that contain at least one instance of term
22230 **   $term within column $col. Field $cnt is set to the total number of
22231 **   instances of term $term in column $col (in any row of the fts5 table).
22232 **
22233 ** row:
22234 **     CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term));
22235 **
22236 **   One row for each term in the database. The value of $doc is set to
22237 **   the number of fts5 rows that contain at least one instance of term
22238 **   $term. Field $cnt is set to the total number of instances of term
22239 **   $term in the database.
22240 **
22241 ** instance:
22242 **     CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
22243 **
22244 **   One row for each term instance in the database.
22245 */
22246 
22247 
22248 /* #include "fts5Int.h" */
22249 
22250 
22251 typedef struct Fts5VocabTable Fts5VocabTable;
22252 typedef struct Fts5VocabCursor Fts5VocabCursor;
22253 
22254 struct Fts5VocabTable {
22255   sqlite3_vtab base;
22256   char *zFts5Tbl;                 /* Name of fts5 table */
22257   char *zFts5Db;                  /* Db containing fts5 table */
22258   sqlite3 *db;                    /* Database handle */
22259   Fts5Global *pGlobal;            /* FTS5 global object for this database */
22260   int eType;                      /* FTS5_VOCAB_COL, ROW or INSTANCE */
22261   unsigned bBusy;                 /* True if busy */
22262 };
22263 
22264 struct Fts5VocabCursor {
22265   sqlite3_vtab_cursor base;
22266   sqlite3_stmt *pStmt;            /* Statement holding lock on pIndex */
22267   Fts5Table *pFts5;               /* Associated FTS5 table */
22268 
22269   int bEof;                       /* True if this cursor is at EOF */
22270   Fts5IndexIter *pIter;           /* Term/rowid iterator object */
22271   void *pStruct;                  /* From sqlite3Fts5StructureRef() */
22272 
22273   int nLeTerm;                    /* Size of zLeTerm in bytes */
22274   char *zLeTerm;                  /* (term <= $zLeTerm) paramater, or NULL */
22275 
22276   /* These are used by 'col' tables only */
22277   int iCol;
22278   i64 *aCnt;
22279   i64 *aDoc;
22280 
22281   /* Output values used by all tables. */
22282   i64 rowid;                      /* This table's current rowid value */
22283   Fts5Buffer term;                /* Current value of 'term' column */
22284 
22285   /* Output values Used by 'instance' tables only */
22286   i64 iInstPos;
22287   int iInstOff;
22288 };
22289 
22290 #define FTS5_VOCAB_COL      0
22291 #define FTS5_VOCAB_ROW      1
22292 #define FTS5_VOCAB_INSTANCE 2
22293 
22294 #define FTS5_VOCAB_COL_SCHEMA  "term, col, doc, cnt"
22295 #define FTS5_VOCAB_ROW_SCHEMA  "term, doc, cnt"
22296 #define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
22297 
22298 /*
22299 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
22300 */
22301 #define FTS5_VOCAB_TERM_EQ 0x01
22302 #define FTS5_VOCAB_TERM_GE 0x02
22303 #define FTS5_VOCAB_TERM_LE 0x04
22304 
22305 
22306 /*
22307 ** Translate a string containing an fts5vocab table type to an
22308 ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output
22309 ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message
22310 ** and return SQLITE_ERROR.
22311 */
22312 static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
22313   int rc = SQLITE_OK;
22314   char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1);
22315   if( rc==SQLITE_OK ){
22316     sqlite3Fts5Dequote(zCopy);
22317     if( sqlite3_stricmp(zCopy, "col")==0 ){
22318       *peType = FTS5_VOCAB_COL;
22319     }else
22320 
22321     if( sqlite3_stricmp(zCopy, "row")==0 ){
22322       *peType = FTS5_VOCAB_ROW;
22323     }else
22324     if( sqlite3_stricmp(zCopy, "instance")==0 ){
22325       *peType = FTS5_VOCAB_INSTANCE;
22326     }else
22327     {
22328       *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
22329       rc = SQLITE_ERROR;
22330     }
22331     sqlite3_free(zCopy);
22332   }
22333 
22334   return rc;
22335 }
22336 
22337 
22338 /*
22339 ** The xDisconnect() virtual table method.
22340 */
22341 static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){
22342   Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
22343   sqlite3_free(pTab);
22344   return SQLITE_OK;
22345 }
22346 
22347 /*
22348 ** The xDestroy() virtual table method.
22349 */
22350 static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){
22351   Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab;
22352   sqlite3_free(pTab);
22353   return SQLITE_OK;
22354 }
22355 
22356 /*
22357 ** This function is the implementation of both the xConnect and xCreate
22358 ** methods of the FTS3 virtual table.
22359 **
22360 ** The argv[] array contains the following:
22361 **
22362 **   argv[0]   -> module name  ("fts5vocab")
22363 **   argv[1]   -> database name
22364 **   argv[2]   -> table name
22365 **
22366 ** then:
22367 **
22368 **   argv[3]   -> name of fts5 table
22369 **   argv[4]   -> type of fts5vocab table
22370 **
22371 ** or, for tables in the TEMP schema only.
22372 **
22373 **   argv[3]   -> name of fts5 tables database
22374 **   argv[4]   -> name of fts5 table
22375 **   argv[5]   -> type of fts5vocab table
22376 */
22377 static int fts5VocabInitVtab(
22378   sqlite3 *db,                    /* The SQLite database connection */
22379   void *pAux,                     /* Pointer to Fts5Global object */
22380   int argc,                       /* Number of elements in argv array */
22381   const char * const *argv,       /* xCreate/xConnect argument array */
22382   sqlite3_vtab **ppVTab,          /* Write the resulting vtab structure here */
22383   char **pzErr                    /* Write any error message here */
22384 ){
22385   const char *azSchema[] = {
22386     "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA  ")",
22387     "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA  ")",
22388     "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
22389   };
22390 
22391   Fts5VocabTable *pRet = 0;
22392   int rc = SQLITE_OK;             /* Return code */
22393   int bDb;
22394 
22395   bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0);
22396 
22397   if( argc!=5 && bDb==0 ){
22398     *pzErr = sqlite3_mprintf("wrong number of vtable arguments");
22399     rc = SQLITE_ERROR;
22400   }else{
22401     int nByte;                      /* Bytes of space to allocate */
22402     const char *zDb = bDb ? argv[3] : argv[1];
22403     const char *zTab = bDb ? argv[4] : argv[3];
22404     const char *zType = bDb ? argv[5] : argv[4];
22405     int nDb = (int)strlen(zDb)+1;
22406     int nTab = (int)strlen(zTab)+1;
22407     int eType = 0;
22408 
22409     rc = fts5VocabTableType(zType, pzErr, &eType);
22410     if( rc==SQLITE_OK ){
22411       assert( eType>=0 && eType<ArraySize(azSchema) );
22412       rc = sqlite3_declare_vtab(db, azSchema[eType]);
22413     }
22414 
22415     nByte = sizeof(Fts5VocabTable) + nDb + nTab;
22416     pRet = sqlite3Fts5MallocZero(&rc, nByte);
22417     if( pRet ){
22418       pRet->pGlobal = (Fts5Global*)pAux;
22419       pRet->eType = eType;
22420       pRet->db = db;
22421       pRet->zFts5Tbl = (char*)&pRet[1];
22422       pRet->zFts5Db = &pRet->zFts5Tbl[nTab];
22423       memcpy(pRet->zFts5Tbl, zTab, nTab);
22424       memcpy(pRet->zFts5Db, zDb, nDb);
22425       sqlite3Fts5Dequote(pRet->zFts5Tbl);
22426       sqlite3Fts5Dequote(pRet->zFts5Db);
22427     }
22428   }
22429 
22430   *ppVTab = (sqlite3_vtab*)pRet;
22431   return rc;
22432 }
22433 
22434 
22435 /*
22436 ** The xConnect() and xCreate() methods for the virtual table. All the
22437 ** work is done in function fts5VocabInitVtab().
22438 */
22439 static int fts5VocabConnectMethod(
22440   sqlite3 *db,                    /* Database connection */
22441   void *pAux,                     /* Pointer to tokenizer hash table */
22442   int argc,                       /* Number of elements in argv array */
22443   const char * const *argv,       /* xCreate/xConnect argument array */
22444   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
22445   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
22446 ){
22447   return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
22448 }
22449 static int fts5VocabCreateMethod(
22450   sqlite3 *db,                    /* Database connection */
22451   void *pAux,                     /* Pointer to tokenizer hash table */
22452   int argc,                       /* Number of elements in argv array */
22453   const char * const *argv,       /* xCreate/xConnect argument array */
22454   sqlite3_vtab **ppVtab,          /* OUT: New sqlite3_vtab object */
22455   char **pzErr                    /* OUT: sqlite3_malloc'd error message */
22456 ){
22457   return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr);
22458 }
22459 
22460 /*
22461 ** Implementation of the xBestIndex method.
22462 **
22463 ** Only constraints of the form:
22464 **
22465 **     term <= ?
22466 **     term == ?
22467 **     term >= ?
22468 **
22469 ** are interpreted. Less-than and less-than-or-equal are treated
22470 ** identically, as are greater-than and greater-than-or-equal.
22471 */
22472 static int fts5VocabBestIndexMethod(
22473   sqlite3_vtab *pUnused,
22474   sqlite3_index_info *pInfo
22475 ){
22476   int i;
22477   int iTermEq = -1;
22478   int iTermGe = -1;
22479   int iTermLe = -1;
22480   int idxNum = 0;
22481   int nArg = 0;
22482 
22483   UNUSED_PARAM(pUnused);
22484 
22485   for(i=0; i<pInfo->nConstraint; i++){
22486     struct sqlite3_index_constraint *p = &pInfo->aConstraint[i];
22487     if( p->usable==0 ) continue;
22488     if( p->iColumn==0 ){          /* term column */
22489       if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i;
22490       if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i;
22491       if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i;
22492       if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i;
22493       if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i;
22494     }
22495   }
22496 
22497   if( iTermEq>=0 ){
22498     idxNum |= FTS5_VOCAB_TERM_EQ;
22499     pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg;
22500     pInfo->estimatedCost = 100;
22501   }else{
22502     pInfo->estimatedCost = 1000000;
22503     if( iTermGe>=0 ){
22504       idxNum |= FTS5_VOCAB_TERM_GE;
22505       pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg;
22506       pInfo->estimatedCost = pInfo->estimatedCost / 2;
22507     }
22508     if( iTermLe>=0 ){
22509       idxNum |= FTS5_VOCAB_TERM_LE;
22510       pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg;
22511       pInfo->estimatedCost = pInfo->estimatedCost / 2;
22512     }
22513   }
22514 
22515   /* This virtual table always delivers results in ascending order of
22516   ** the "term" column (column 0). So if the user has requested this
22517   ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the
22518   ** sqlite3_index_info.orderByConsumed flag to tell the core the results
22519   ** are already in sorted order.  */
22520   if( pInfo->nOrderBy==1
22521    && pInfo->aOrderBy[0].iColumn==0
22522    && pInfo->aOrderBy[0].desc==0
22523   ){
22524     pInfo->orderByConsumed = 1;
22525   }
22526 
22527   pInfo->idxNum = idxNum;
22528   return SQLITE_OK;
22529 }
22530 
22531 /*
22532 ** Implementation of xOpen method.
22533 */
22534 static int fts5VocabOpenMethod(
22535   sqlite3_vtab *pVTab,
22536   sqlite3_vtab_cursor **ppCsr
22537 ){
22538   Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab;
22539   Fts5Table *pFts5 = 0;
22540   Fts5VocabCursor *pCsr = 0;
22541   int rc = SQLITE_OK;
22542   sqlite3_stmt *pStmt = 0;
22543   char *zSql = 0;
22544 
22545   if( pTab->bBusy ){
22546     pVTab->zErrMsg = sqlite3_mprintf(
22547        "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
22548     );
22549     return SQLITE_ERROR;
22550   }
22551   zSql = sqlite3Fts5Mprintf(&rc,
22552       "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'",
22553       pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl
22554   );
22555   if( zSql ){
22556     rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0);
22557   }
22558   sqlite3_free(zSql);
22559   assert( rc==SQLITE_OK || pStmt==0 );
22560   if( rc==SQLITE_ERROR ) rc = SQLITE_OK;
22561 
22562   pTab->bBusy = 1;
22563   if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){
22564     i64 iId = sqlite3_column_int64(pStmt, 0);
22565     pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId);
22566   }
22567   pTab->bBusy = 0;
22568 
22569   if( rc==SQLITE_OK ){
22570     if( pFts5==0 ){
22571       rc = sqlite3_finalize(pStmt);
22572       pStmt = 0;
22573       if( rc==SQLITE_OK ){
22574         pVTab->zErrMsg = sqlite3_mprintf(
22575             "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl
22576             );
22577         rc = SQLITE_ERROR;
22578       }
22579     }else{
22580       rc = sqlite3Fts5FlushToDisk(pFts5);
22581     }
22582   }
22583 
22584   if( rc==SQLITE_OK ){
22585     i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor);
22586     pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte);
22587   }
22588 
22589   if( pCsr ){
22590     pCsr->pFts5 = pFts5;
22591     pCsr->pStmt = pStmt;
22592     pCsr->aCnt = (i64*)&pCsr[1];
22593     pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol];
22594   }else{
22595     sqlite3_finalize(pStmt);
22596   }
22597 
22598   *ppCsr = (sqlite3_vtab_cursor*)pCsr;
22599   return rc;
22600 }
22601 
22602 static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){
22603   pCsr->rowid = 0;
22604   sqlite3Fts5IterClose(pCsr->pIter);
22605   sqlite3Fts5StructureRelease(pCsr->pStruct);
22606   pCsr->pStruct = 0;
22607   pCsr->pIter = 0;
22608   sqlite3_free(pCsr->zLeTerm);
22609   pCsr->nLeTerm = -1;
22610   pCsr->zLeTerm = 0;
22611   pCsr->bEof = 0;
22612 }
22613 
22614 /*
22615 ** Close the cursor.  For additional information see the documentation
22616 ** on the xClose method of the virtual table interface.
22617 */
22618 static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
22619   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22620   fts5VocabResetCursor(pCsr);
22621   sqlite3Fts5BufferFree(&pCsr->term);
22622   sqlite3_finalize(pCsr->pStmt);
22623   sqlite3_free(pCsr);
22624   return SQLITE_OK;
22625 }
22626 
22627 static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
22628   int rc = SQLITE_OK;
22629 
22630   if( sqlite3Fts5IterEof(pCsr->pIter) ){
22631     pCsr->bEof = 1;
22632   }else{
22633     const char *zTerm;
22634     int nTerm;
22635     zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22636     if( pCsr->nLeTerm>=0 ){
22637       int nCmp = MIN(nTerm, pCsr->nLeTerm);
22638       int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
22639       if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
22640         pCsr->bEof = 1;
22641       }
22642     }
22643 
22644     sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
22645   }
22646   return rc;
22647 }
22648 
22649 static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
22650   int eDetail = pCsr->pFts5->pConfig->eDetail;
22651   int rc = SQLITE_OK;
22652   Fts5IndexIter *pIter = pCsr->pIter;
22653   i64 *pp = &pCsr->iInstPos;
22654   int *po = &pCsr->iInstOff;
22655 
22656   assert( sqlite3Fts5IterEof(pIter)==0 );
22657   assert( pCsr->bEof==0 );
22658   while( eDetail==FTS5_DETAIL_NONE
22659       || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
22660   ){
22661     pCsr->iInstPos = 0;
22662     pCsr->iInstOff = 0;
22663 
22664     rc = sqlite3Fts5IterNextScan(pCsr->pIter);
22665     if( rc==SQLITE_OK ){
22666       rc = fts5VocabInstanceNewTerm(pCsr);
22667       if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break;
22668     }
22669     if( rc ){
22670       pCsr->bEof = 1;
22671       break;
22672     }
22673   }
22674 
22675   return rc;
22676 }
22677 
22678 /*
22679 ** Advance the cursor to the next row in the table.
22680 */
22681 static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
22682   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22683   Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
22684   int nCol = pCsr->pFts5->pConfig->nCol;
22685   int rc;
22686 
22687   rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct);
22688   if( rc!=SQLITE_OK ) return rc;
22689   pCsr->rowid++;
22690 
22691   if( pTab->eType==FTS5_VOCAB_INSTANCE ){
22692     return fts5VocabInstanceNext(pCsr);
22693   }
22694 
22695   if( pTab->eType==FTS5_VOCAB_COL ){
22696     for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
22697       if( pCsr->aDoc[pCsr->iCol] ) break;
22698     }
22699   }
22700 
22701   if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
22702     if( sqlite3Fts5IterEof(pCsr->pIter) ){
22703       pCsr->bEof = 1;
22704     }else{
22705       const char *zTerm;
22706       int nTerm;
22707 
22708       zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22709       assert( nTerm>=0 );
22710       if( pCsr->nLeTerm>=0 ){
22711         int nCmp = MIN(nTerm, pCsr->nLeTerm);
22712         int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
22713         if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
22714           pCsr->bEof = 1;
22715           return SQLITE_OK;
22716         }
22717       }
22718 
22719       sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
22720       memset(pCsr->aCnt, 0, nCol * sizeof(i64));
22721       memset(pCsr->aDoc, 0, nCol * sizeof(i64));
22722       pCsr->iCol = 0;
22723 
22724       assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
22725       while( rc==SQLITE_OK ){
22726         int eDetail = pCsr->pFts5->pConfig->eDetail;
22727         const u8 *pPos; int nPos;   /* Position list */
22728         i64 iPos = 0;               /* 64-bit position read from poslist */
22729         int iOff = 0;               /* Current offset within position list */
22730 
22731         pPos = pCsr->pIter->pData;
22732         nPos = pCsr->pIter->nData;
22733 
22734         switch( pTab->eType ){
22735           case FTS5_VOCAB_ROW:
22736             if( eDetail==FTS5_DETAIL_FULL ){
22737               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
22738                 pCsr->aCnt[0]++;
22739               }
22740             }
22741             pCsr->aDoc[0]++;
22742             break;
22743 
22744           case FTS5_VOCAB_COL:
22745             if( eDetail==FTS5_DETAIL_FULL ){
22746               int iCol = -1;
22747               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
22748                 int ii = FTS5_POS2COLUMN(iPos);
22749                 if( iCol!=ii ){
22750                   if( ii>=nCol ){
22751                     rc = FTS5_CORRUPT;
22752                     break;
22753                   }
22754                   pCsr->aDoc[ii]++;
22755                   iCol = ii;
22756                 }
22757                 pCsr->aCnt[ii]++;
22758               }
22759             }else if( eDetail==FTS5_DETAIL_COLUMNS ){
22760               while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
22761                 assert_nc( iPos>=0 && iPos<nCol );
22762                 if( iPos>=nCol ){
22763                   rc = FTS5_CORRUPT;
22764                   break;
22765                 }
22766                 pCsr->aDoc[iPos]++;
22767               }
22768             }else{
22769               assert( eDetail==FTS5_DETAIL_NONE );
22770               pCsr->aDoc[0]++;
22771             }
22772             break;
22773 
22774           default:
22775             assert( pTab->eType==FTS5_VOCAB_INSTANCE );
22776             break;
22777         }
22778 
22779         if( rc==SQLITE_OK ){
22780           rc = sqlite3Fts5IterNextScan(pCsr->pIter);
22781         }
22782         if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
22783 
22784         if( rc==SQLITE_OK ){
22785           zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
22786           if( nTerm!=pCsr->term.n
22787           || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm))
22788           ){
22789             break;
22790           }
22791           if( sqlite3Fts5IterEof(pCsr->pIter) ) break;
22792         }
22793       }
22794     }
22795   }
22796 
22797   if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){
22798     for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++);
22799     if( pCsr->iCol==nCol ){
22800       rc = FTS5_CORRUPT;
22801     }
22802   }
22803   return rc;
22804 }
22805 
22806 /*
22807 ** This is the xFilter implementation for the virtual table.
22808 */
22809 static int fts5VocabFilterMethod(
22810   sqlite3_vtab_cursor *pCursor,   /* The cursor used for this query */
22811   int idxNum,                     /* Strategy index */
22812   const char *zUnused,            /* Unused */
22813   int nUnused,                    /* Number of elements in apVal */
22814   sqlite3_value **apVal           /* Arguments for the indexing scheme */
22815 ){
22816   Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
22817   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22818   int eType = pTab->eType;
22819   int rc = SQLITE_OK;
22820 
22821   int iVal = 0;
22822   int f = FTS5INDEX_QUERY_SCAN;
22823   const char *zTerm = 0;
22824   int nTerm = 0;
22825 
22826   sqlite3_value *pEq = 0;
22827   sqlite3_value *pGe = 0;
22828   sqlite3_value *pLe = 0;
22829 
22830   UNUSED_PARAM2(zUnused, nUnused);
22831 
22832   fts5VocabResetCursor(pCsr);
22833   if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++];
22834   if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++];
22835   if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++];
22836 
22837   if( pEq ){
22838     zTerm = (const char *)sqlite3_value_text(pEq);
22839     nTerm = sqlite3_value_bytes(pEq);
22840     f = 0;
22841   }else{
22842     if( pGe ){
22843       zTerm = (const char *)sqlite3_value_text(pGe);
22844       nTerm = sqlite3_value_bytes(pGe);
22845     }
22846     if( pLe ){
22847       const char *zCopy = (const char *)sqlite3_value_text(pLe);
22848       if( zCopy==0 ) zCopy = "";
22849       pCsr->nLeTerm = sqlite3_value_bytes(pLe);
22850       pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1);
22851       if( pCsr->zLeTerm==0 ){
22852         rc = SQLITE_NOMEM;
22853       }else{
22854         memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1);
22855       }
22856     }
22857   }
22858 
22859   if( rc==SQLITE_OK ){
22860     Fts5Index *pIndex = pCsr->pFts5->pIndex;
22861     rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
22862     if( rc==SQLITE_OK ){
22863       pCsr->pStruct = sqlite3Fts5StructureRef(pIndex);
22864     }
22865   }
22866   if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
22867     rc = fts5VocabInstanceNewTerm(pCsr);
22868   }
22869   if( rc==SQLITE_OK && !pCsr->bEof
22870    && (eType!=FTS5_VOCAB_INSTANCE
22871     || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE)
22872   ){
22873     rc = fts5VocabNextMethod(pCursor);
22874   }
22875 
22876   return rc;
22877 }
22878 
22879 /*
22880 ** This is the xEof method of the virtual table. SQLite calls this
22881 ** routine to find out if it has reached the end of a result set.
22882 */
22883 static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){
22884   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22885   return pCsr->bEof;
22886 }
22887 
22888 static int fts5VocabColumnMethod(
22889   sqlite3_vtab_cursor *pCursor,   /* Cursor to retrieve value from */
22890   sqlite3_context *pCtx,          /* Context for sqlite3_result_xxx() calls */
22891   int iCol                        /* Index of column to read value from */
22892 ){
22893   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22894   int eDetail = pCsr->pFts5->pConfig->eDetail;
22895   int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType;
22896   i64 iVal = 0;
22897 
22898   if( iCol==0 ){
22899     sqlite3_result_text(
22900         pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT
22901     );
22902   }else if( eType==FTS5_VOCAB_COL ){
22903     assert( iCol==1 || iCol==2 || iCol==3 );
22904     if( iCol==1 ){
22905       if( eDetail!=FTS5_DETAIL_NONE ){
22906         const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol];
22907         sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
22908       }
22909     }else if( iCol==2 ){
22910       iVal = pCsr->aDoc[pCsr->iCol];
22911     }else{
22912       iVal = pCsr->aCnt[pCsr->iCol];
22913     }
22914   }else if( eType==FTS5_VOCAB_ROW ){
22915     assert( iCol==1 || iCol==2 );
22916     if( iCol==1 ){
22917       iVal = pCsr->aDoc[0];
22918     }else{
22919       iVal = pCsr->aCnt[0];
22920     }
22921   }else{
22922     assert( eType==FTS5_VOCAB_INSTANCE );
22923     switch( iCol ){
22924       case 1:
22925         sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
22926         break;
22927       case 2: {
22928         int ii = -1;
22929         if( eDetail==FTS5_DETAIL_FULL ){
22930           ii = FTS5_POS2COLUMN(pCsr->iInstPos);
22931         }else if( eDetail==FTS5_DETAIL_COLUMNS ){
22932           ii = (int)pCsr->iInstPos;
22933         }
22934         if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){
22935           const char *z = pCsr->pFts5->pConfig->azCol[ii];
22936           sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
22937         }
22938         break;
22939       }
22940       default: {
22941         assert( iCol==3 );
22942         if( eDetail==FTS5_DETAIL_FULL ){
22943           int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
22944           sqlite3_result_int(pCtx, ii);
22945         }
22946         break;
22947       }
22948     }
22949   }
22950 
22951   if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);
22952   return SQLITE_OK;
22953 }
22954 
22955 /*
22956 ** This is the xRowid method. The SQLite core calls this routine to
22957 ** retrieve the rowid for the current row of the result set. The
22958 ** rowid should be written to *pRowid.
22959 */
22960 static int fts5VocabRowidMethod(
22961   sqlite3_vtab_cursor *pCursor,
22962   sqlite_int64 *pRowid
22963 ){
22964   Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
22965   *pRowid = pCsr->rowid;
22966   return SQLITE_OK;
22967 }
22968 
22969 static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){
22970   static const sqlite3_module fts5Vocab = {
22971     /* iVersion      */ 2,
22972     /* xCreate       */ fts5VocabCreateMethod,
22973     /* xConnect      */ fts5VocabConnectMethod,
22974     /* xBestIndex    */ fts5VocabBestIndexMethod,
22975     /* xDisconnect   */ fts5VocabDisconnectMethod,
22976     /* xDestroy      */ fts5VocabDestroyMethod,
22977     /* xOpen         */ fts5VocabOpenMethod,
22978     /* xClose        */ fts5VocabCloseMethod,
22979     /* xFilter       */ fts5VocabFilterMethod,
22980     /* xNext         */ fts5VocabNextMethod,
22981     /* xEof          */ fts5VocabEofMethod,
22982     /* xColumn       */ fts5VocabColumnMethod,
22983     /* xRowid        */ fts5VocabRowidMethod,
22984     /* xUpdate       */ 0,
22985     /* xBegin        */ 0,
22986     /* xSync         */ 0,
22987     /* xCommit       */ 0,
22988     /* xRollback     */ 0,
22989     /* xFindFunction */ 0,
22990     /* xRename       */ 0,
22991     /* xSavepoint    */ 0,
22992     /* xRelease      */ 0,
22993     /* xRollbackTo   */ 0,
22994     /* xShadowName   */ 0
22995   };
22996   void *p = (void*)pGlobal;
22997 
22998   return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0);
22999 }
23000 
23001 
23002 
23003 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */
23004