1 2 3 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) 4 5 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 6 # define NDEBUG 1 7 #endif 8 #if defined(NDEBUG) && defined(SQLITE_DEBUG) 9 # undef NDEBUG 10 #endif 11 12 #line 1 "fts5.h" 13 /* 14 ** 2014 May 31 15 ** 16 ** The author disclaims copyright to this source code. In place of 17 ** a legal notice, here is a blessing: 18 ** 19 ** May you do good and not evil. 20 ** May you find forgiveness for yourself and forgive others. 21 ** May you share freely, never taking more than you give. 22 ** 23 ****************************************************************************** 24 ** 25 ** Interfaces to extend FTS5. Using the interfaces defined in this file, 26 ** FTS5 may be extended with: 27 ** 28 ** * custom tokenizers, and 29 ** * custom auxiliary functions. 30 */ 31 32 33 #ifndef _FTS5_H 34 #define _FTS5_H 35 36 #include "sqlite3.h" 37 38 #ifdef __cplusplus 39 extern "C" { 40 #endif 41 42 /************************************************************************* 43 ** CUSTOM AUXILIARY FUNCTIONS 44 ** 45 ** Virtual table implementations may overload SQL functions by implementing 46 ** the sqlite3_module.xFindFunction() method. 47 */ 48 49 typedef struct Fts5ExtensionApi Fts5ExtensionApi; 50 typedef struct Fts5Context Fts5Context; 51 typedef struct Fts5PhraseIter Fts5PhraseIter; 52 53 typedef void (*fts5_extension_function)( 54 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 55 Fts5Context *pFts, /* First arg to pass to pApi functions */ 56 sqlite3_context *pCtx, /* Context for returning result/error */ 57 int nVal, /* Number of values in apVal[] array */ 58 sqlite3_value **apVal /* Array of trailing arguments */ 59 ); 60 61 struct Fts5PhraseIter { 62 const unsigned char *a; 63 const unsigned char *b; 64 }; 65 66 /* 67 ** EXTENSION API FUNCTIONS 68 ** 69 ** xUserData(pFts): 70 ** Return a copy of the context pointer the extension function was 71 ** registered with. 72 ** 73 ** xColumnTotalSize(pFts, iCol, pnToken): 74 ** If parameter iCol is less than zero, set output variable *pnToken 75 ** to the total number of tokens in the FTS5 table. Or, if iCol is 76 ** non-negative but less than the number of columns in the table, return 77 ** the total number of tokens in column iCol, considering all rows in 78 ** the FTS5 table. 79 ** 80 ** If parameter iCol is greater than or equal to the number of columns 81 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. 82 ** an OOM condition or IO error), an appropriate SQLite error code is 83 ** returned. 84 ** 85 ** xColumnCount(pFts): 86 ** Return the number of columns in the table. 87 ** 88 ** xColumnSize(pFts, iCol, pnToken): 89 ** If parameter iCol is less than zero, set output variable *pnToken 90 ** to the total number of tokens in the current row. Or, if iCol is 91 ** non-negative but less than the number of columns in the table, set 92 ** *pnToken to the number of tokens in column iCol of the current row. 93 ** 94 ** If parameter iCol is greater than or equal to the number of columns 95 ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g. 96 ** an OOM condition or IO error), an appropriate SQLite error code is 97 ** returned. 98 ** 99 ** This function may be quite inefficient if used with an FTS5 table 100 ** created with the "columnsize=0" option. 101 ** 102 ** xColumnText: 103 ** This function attempts to retrieve the text of column iCol of the 104 ** current document. If successful, (*pz) is set to point to a buffer 105 ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes 106 ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise, 107 ** if an error occurs, an SQLite error code is returned and the final values 108 ** of (*pz) and (*pn) are undefined. 109 ** 110 ** xPhraseCount: 111 ** Returns the number of phrases in the current query expression. 112 ** 113 ** xPhraseSize: 114 ** Returns the number of tokens in phrase iPhrase of the query. Phrases 115 ** are numbered starting from zero. 116 ** 117 ** xInstCount: 118 ** Set *pnInst to the total number of occurrences of all phrases within 119 ** the query within the current row. Return SQLITE_OK if successful, or 120 ** an error code (i.e. SQLITE_NOMEM) if an error occurs. 121 ** 122 ** This API can be quite slow if used with an FTS5 table created with the 123 ** "detail=none" or "detail=column" option. If the FTS5 table is created 124 ** with either "detail=none" or "detail=column" and "content=" option 125 ** (i.e. if it is a contentless table), then this API always returns 0. 126 ** 127 ** xInst: 128 ** Query for the details of phrase match iIdx within the current row. 129 ** Phrase matches are numbered starting from zero, so the iIdx argument 130 ** should be greater than or equal to zero and smaller than the value 131 ** output by xInstCount(). 132 ** 133 ** Usually, output parameter *piPhrase is set to the phrase number, *piCol 134 ** to the column in which it occurs and *piOff the token offset of the 135 ** first token of the phrase. Returns SQLITE_OK if successful, or an error 136 ** code (i.e. SQLITE_NOMEM) if an error occurs. 137 ** 138 ** This API can be quite slow if used with an FTS5 table created with the 139 ** "detail=none" or "detail=column" option. 140 ** 141 ** xRowid: 142 ** Returns the rowid of the current row. 143 ** 144 ** xTokenize: 145 ** Tokenize text using the tokenizer belonging to the FTS5 table. 146 ** 147 ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): 148 ** This API function is used to query the FTS table for phrase iPhrase 149 ** of the current query. Specifically, a query equivalent to: 150 ** 151 ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid 152 ** 153 ** with $p set to a phrase equivalent to the phrase iPhrase of the 154 ** current query is executed. Any column filter that applies to 155 ** phrase iPhrase of the current query is included in $p. For each 156 ** row visited, the callback function passed as the fourth argument 157 ** is invoked. The context and API objects passed to the callback 158 ** function may be used to access the properties of each matched row. 159 ** Invoking Api.xUserData() returns a copy of the pointer passed as 160 ** the third argument to pUserData. 161 ** 162 ** If the callback function returns any value other than SQLITE_OK, the 163 ** query is abandoned and the xQueryPhrase function returns immediately. 164 ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. 165 ** Otherwise, the error code is propagated upwards. 166 ** 167 ** If the query runs to completion without incident, SQLITE_OK is returned. 168 ** Or, if some error occurs before the query completes or is aborted by 169 ** the callback, an SQLite error code is returned. 170 ** 171 ** 172 ** xSetAuxdata(pFts5, pAux, xDelete) 173 ** 174 ** Save the pointer passed as the second argument as the extension function's 175 ** "auxiliary data". The pointer may then be retrieved by the current or any 176 ** future invocation of the same fts5 extension function made as part of 177 ** the same MATCH query using the xGetAuxdata() API. 178 ** 179 ** Each extension function is allocated a single auxiliary data slot for 180 ** each FTS query (MATCH expression). If the extension function is invoked 181 ** more than once for a single FTS query, then all invocations share a 182 ** single auxiliary data context. 183 ** 184 ** If there is already an auxiliary data pointer when this function is 185 ** invoked, then it is replaced by the new pointer. If an xDelete callback 186 ** was specified along with the original pointer, it is invoked at this 187 ** point. 188 ** 189 ** The xDelete callback, if one is specified, is also invoked on the 190 ** auxiliary data pointer after the FTS5 query has finished. 191 ** 192 ** If an error (e.g. an OOM condition) occurs within this function, 193 ** the auxiliary data is set to NULL and an error code returned. If the 194 ** xDelete parameter was not NULL, it is invoked on the auxiliary data 195 ** pointer before returning. 196 ** 197 ** 198 ** xGetAuxdata(pFts5, bClear) 199 ** 200 ** Returns the current auxiliary data pointer for the fts5 extension 201 ** function. See the xSetAuxdata() method for details. 202 ** 203 ** If the bClear argument is non-zero, then the auxiliary data is cleared 204 ** (set to NULL) before this function returns. In this case the xDelete, 205 ** if any, is not invoked. 206 ** 207 ** 208 ** xRowCount(pFts5, pnRow) 209 ** 210 ** This function is used to retrieve the total number of rows in the table. 211 ** In other words, the same value that would be returned by: 212 ** 213 ** SELECT count(*) FROM ftstable; 214 ** 215 ** xPhraseFirst() 216 ** This function is used, along with type Fts5PhraseIter and the xPhraseNext 217 ** method, to iterate through all instances of a single query phrase within 218 ** the current row. This is the same information as is accessible via the 219 ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient 220 ** to use, this API may be faster under some circumstances. To iterate 221 ** through instances of phrase iPhrase, use the following code: 222 ** 223 ** Fts5PhraseIter iter; 224 ** int iCol, iOff; 225 ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff); 226 ** iCol>=0; 227 ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff) 228 ** ){ 229 ** // An instance of phrase iPhrase at offset iOff of column iCol 230 ** } 231 ** 232 ** The Fts5PhraseIter structure is defined above. Applications should not 233 ** modify this structure directly - it should only be used as shown above 234 ** with the xPhraseFirst() and xPhraseNext() API methods (and by 235 ** xPhraseFirstColumn() and xPhraseNextColumn() as illustrated below). 236 ** 237 ** This API can be quite slow if used with an FTS5 table created with the 238 ** "detail=none" or "detail=column" option. If the FTS5 table is created 239 ** with either "detail=none" or "detail=column" and "content=" option 240 ** (i.e. if it is a contentless table), then this API always iterates 241 ** through an empty set (all calls to xPhraseFirst() set iCol to -1). 242 ** 243 ** xPhraseNext() 244 ** See xPhraseFirst above. 245 ** 246 ** xPhraseFirstColumn() 247 ** This function and xPhraseNextColumn() are similar to the xPhraseFirst() 248 ** and xPhraseNext() APIs described above. The difference is that instead 249 ** of iterating through all instances of a phrase in the current row, these 250 ** APIs are used to iterate through the set of columns in the current row 251 ** that contain one or more instances of a specified phrase. For example: 252 ** 253 ** Fts5PhraseIter iter; 254 ** int iCol; 255 ** for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol); 256 ** iCol>=0; 257 ** pApi->xPhraseNextColumn(pFts, &iter, &iCol) 258 ** ){ 259 ** // Column iCol contains at least one instance of phrase iPhrase 260 ** } 261 ** 262 ** This API can be quite slow if used with an FTS5 table created with the 263 ** "detail=none" option. If the FTS5 table is created with either 264 ** "detail=none" "content=" option (i.e. if it is a contentless table), 265 ** then this API always iterates through an empty set (all calls to 266 ** xPhraseFirstColumn() set iCol to -1). 267 ** 268 ** The information accessed using this API and its companion 269 ** xPhraseFirstColumn() may also be obtained using xPhraseFirst/xPhraseNext 270 ** (or xInst/xInstCount). The chief advantage of this API is that it is 271 ** significantly more efficient than those alternatives when used with 272 ** "detail=column" tables. 273 ** 274 ** xPhraseNextColumn() 275 ** See xPhraseFirstColumn above. 276 */ 277 struct Fts5ExtensionApi { 278 int iVersion; /* Currently always set to 3 */ 279 280 void *(*xUserData)(Fts5Context*); 281 282 int (*xColumnCount)(Fts5Context*); 283 int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); 284 int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); 285 286 int (*xTokenize)(Fts5Context*, 287 const char *pText, int nText, /* Text to tokenize */ 288 void *pCtx, /* Context passed to xToken() */ 289 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ 290 ); 291 292 int (*xPhraseCount)(Fts5Context*); 293 int (*xPhraseSize)(Fts5Context*, int iPhrase); 294 295 int (*xInstCount)(Fts5Context*, int *pnInst); 296 int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff); 297 298 sqlite3_int64 (*xRowid)(Fts5Context*); 299 int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); 300 int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); 301 302 int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, 303 int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) 304 ); 305 int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); 306 void *(*xGetAuxdata)(Fts5Context*, int bClear); 307 308 int (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*); 309 void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff); 310 311 int (*xPhraseFirstColumn)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*); 312 void (*xPhraseNextColumn)(Fts5Context*, Fts5PhraseIter*, int *piCol); 313 }; 314 315 /* 316 ** CUSTOM AUXILIARY FUNCTIONS 317 *************************************************************************/ 318 319 /************************************************************************* 320 ** CUSTOM TOKENIZERS 321 ** 322 ** Applications may also register custom tokenizer types. A tokenizer 323 ** is registered by providing fts5 with a populated instance of the 324 ** following structure. All structure methods must be defined, setting 325 ** any member of the fts5_tokenizer struct to NULL leads to undefined 326 ** behaviour. The structure methods are expected to function as follows: 327 ** 328 ** xCreate: 329 ** This function is used to allocate and initialize a tokenizer instance. 330 ** A tokenizer instance is required to actually tokenize text. 331 ** 332 ** The first argument passed to this function is a copy of the (void*) 333 ** pointer provided by the application when the fts5_tokenizer object 334 ** was registered with FTS5 (the third argument to xCreateTokenizer()). 335 ** The second and third arguments are an array of nul-terminated strings 336 ** containing the tokenizer arguments, if any, specified following the 337 ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used 338 ** to create the FTS5 table. 339 ** 340 ** The final argument is an output variable. If successful, (*ppOut) 341 ** should be set to point to the new tokenizer handle and SQLITE_OK 342 ** returned. If an error occurs, some value other than SQLITE_OK should 343 ** be returned. In this case, fts5 assumes that the final value of *ppOut 344 ** is undefined. 345 ** 346 ** xDelete: 347 ** This function is invoked to delete a tokenizer handle previously 348 ** allocated using xCreate(). Fts5 guarantees that this function will 349 ** be invoked exactly once for each successful call to xCreate(). 350 ** 351 ** xTokenize: 352 ** This function is expected to tokenize the nText byte string indicated 353 ** by argument pText. pText may or may not be nul-terminated. The first 354 ** argument passed to this function is a pointer to an Fts5Tokenizer object 355 ** returned by an earlier call to xCreate(). 356 ** 357 ** The second argument indicates the reason that FTS5 is requesting 358 ** tokenization of the supplied text. This is always one of the following 359 ** four values: 360 ** 361 ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into 362 ** or removed from the FTS table. The tokenizer is being invoked to 363 ** determine the set of tokens to add to (or delete from) the 364 ** FTS index. 365 ** 366 ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed 367 ** against the FTS index. The tokenizer is being called to tokenize 368 ** a bareword or quoted string specified as part of the query. 369 ** 370 ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as 371 ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is 372 ** followed by a "*" character, indicating that the last token 373 ** returned by the tokenizer will be treated as a token prefix. 374 ** 375 ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to 376 ** satisfy an fts5_api.xTokenize() request made by an auxiliary 377 ** function. Or an fts5_api.xColumnSize() request made by the same 378 ** on a columnsize=0 database. 379 ** </ul> 380 ** 381 ** For each token in the input string, the supplied callback xToken() must 382 ** be invoked. The first argument to it should be a copy of the pointer 383 ** passed as the second argument to xTokenize(). The third and fourth 384 ** arguments are a pointer to a buffer containing the token text, and the 385 ** size of the token in bytes. The 4th and 5th arguments are the byte offsets 386 ** of the first byte of and first byte immediately following the text from 387 ** which the token is derived within the input. 388 ** 389 ** The second argument passed to the xToken() callback ("tflags") should 390 ** normally be set to 0. The exception is if the tokenizer supports 391 ** synonyms. In this case see the discussion below for details. 392 ** 393 ** FTS5 assumes the xToken() callback is invoked for each token in the 394 ** order that they occur within the input text. 395 ** 396 ** If an xToken() callback returns any value other than SQLITE_OK, then 397 ** the tokenization should be abandoned and the xTokenize() method should 398 ** immediately return a copy of the xToken() return value. Or, if the 399 ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally, 400 ** if an error occurs with the xTokenize() implementation itself, it 401 ** may abandon the tokenization and return any error code other than 402 ** SQLITE_OK or SQLITE_DONE. 403 ** 404 ** SYNONYM SUPPORT 405 ** 406 ** Custom tokenizers may also support synonyms. Consider a case in which a 407 ** user wishes to query for a phrase such as "first place". Using the 408 ** built-in tokenizers, the FTS5 query 'first + place' will match instances 409 ** of "first place" within the document set, but not alternative forms 410 ** such as "1st place". In some applications, it would be better to match 411 ** all instances of "first place" or "1st place" regardless of which form 412 ** the user specified in the MATCH query text. 413 ** 414 ** There are several ways to approach this in FTS5: 415 ** 416 ** <ol><li> By mapping all synonyms to a single token. In this case, using 417 ** the above example, this means that the tokenizer returns the 418 ** same token for inputs "first" and "1st". Say that token is in 419 ** fact "first", so that when the user inserts the document "I won 420 ** 1st place" entries are added to the index for tokens "i", "won", 421 ** "first" and "place". If the user then queries for '1st + place', 422 ** the tokenizer substitutes "first" for "1st" and the query works 423 ** as expected. 424 ** 425 ** <li> By querying the index for all synonyms of each query term 426 ** separately. In this case, when tokenizing query text, the 427 ** tokenizer may provide multiple synonyms for a single term 428 ** within the document. FTS5 then queries the index for each 429 ** synonym individually. For example, faced with the query: 430 ** 431 ** <codeblock> 432 ** ... MATCH 'first place'</codeblock> 433 ** 434 ** the tokenizer offers both "1st" and "first" as synonyms for the 435 ** first token in the MATCH query and FTS5 effectively runs a query 436 ** similar to: 437 ** 438 ** <codeblock> 439 ** ... MATCH '(first OR 1st) place'</codeblock> 440 ** 441 ** except that, for the purposes of auxiliary functions, the query 442 ** still appears to contain just two phrases - "(first OR 1st)" 443 ** being treated as a single phrase. 444 ** 445 ** <li> By adding multiple synonyms for a single term to the FTS index. 446 ** Using this method, when tokenizing document text, the tokenizer 447 ** provides multiple synonyms for each token. So that when a 448 ** document such as "I won first place" is tokenized, entries are 449 ** added to the FTS index for "i", "won", "first", "1st" and 450 ** "place". 451 ** 452 ** This way, even if the tokenizer does not provide synonyms 453 ** when tokenizing query text (it should not - to do so would be 454 ** inefficient), it doesn't matter if the user queries for 455 ** 'first + place' or '1st + place', as there are entries in the 456 ** FTS index corresponding to both forms of the first token. 457 ** </ol> 458 ** 459 ** Whether it is parsing document or query text, any call to xToken that 460 ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit 461 ** is considered to supply a synonym for the previous token. For example, 462 ** when parsing the document "I won first place", a tokenizer that supports 463 ** synonyms would call xToken() 5 times, as follows: 464 ** 465 ** <codeblock> 466 ** xToken(pCtx, 0, "i", 1, 0, 1); 467 ** xToken(pCtx, 0, "won", 3, 2, 5); 468 ** xToken(pCtx, 0, "first", 5, 6, 11); 469 ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11); 470 ** xToken(pCtx, 0, "place", 5, 12, 17); 471 **</codeblock> 472 ** 473 ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time 474 ** xToken() is called. Multiple synonyms may be specified for a single token 475 ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence. 476 ** There is no limit to the number of synonyms that may be provided for a 477 ** single token. 478 ** 479 ** In many cases, method (1) above is the best approach. It does not add 480 ** extra data to the FTS index or require FTS5 to query for multiple terms, 481 ** so it is efficient in terms of disk space and query speed. However, it 482 ** does not support prefix queries very well. If, as suggested above, the 483 ** token "first" is substituted for "1st" by the tokenizer, then the query: 484 ** 485 ** <codeblock> 486 ** ... MATCH '1s*'</codeblock> 487 ** 488 ** will not match documents that contain the token "1st" (as the tokenizer 489 ** will probably not map "1s" to any prefix of "first"). 490 ** 491 ** For full prefix support, method (3) may be preferred. In this case, 492 ** because the index contains entries for both "first" and "1st", prefix 493 ** queries such as 'fi*' or '1s*' will match correctly. However, because 494 ** extra entries are added to the FTS index, this method uses more space 495 ** within the database. 496 ** 497 ** Method (2) offers a midpoint between (1) and (3). Using this method, 498 ** a query such as '1s*' will match documents that contain the literal 499 ** token "1st", but not "first" (assuming the tokenizer is not able to 500 ** provide synonyms for prefixes). However, a non-prefix query like '1st' 501 ** will match against "1st" and "first". This method does not require 502 ** extra disk space, as no extra entries are added to the FTS index. 503 ** On the other hand, it may require more CPU cycles to run MATCH queries, 504 ** as separate queries of the FTS index are required for each synonym. 505 ** 506 ** When using methods (2) or (3), it is important that the tokenizer only 507 ** provide synonyms when tokenizing document text (method (2)) or query 508 ** text (method (3)), not both. Doing so will not cause any errors, but is 509 ** inefficient. 510 */ 511 typedef struct Fts5Tokenizer Fts5Tokenizer; 512 typedef struct fts5_tokenizer fts5_tokenizer; 513 struct fts5_tokenizer { 514 int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut); 515 void (*xDelete)(Fts5Tokenizer*); 516 int (*xTokenize)(Fts5Tokenizer*, 517 void *pCtx, 518 int flags, /* Mask of FTS5_TOKENIZE_* flags */ 519 const char *pText, int nText, 520 int (*xToken)( 521 void *pCtx, /* Copy of 2nd argument to xTokenize() */ 522 int tflags, /* Mask of FTS5_TOKEN_* flags */ 523 const char *pToken, /* Pointer to buffer containing token */ 524 int nToken, /* Size of token in bytes */ 525 int iStart, /* Byte offset of token within input text */ 526 int iEnd /* Byte offset of end of token within input text */ 527 ) 528 ); 529 }; 530 531 /* Flags that may be passed as the third argument to xTokenize() */ 532 #define FTS5_TOKENIZE_QUERY 0x0001 533 #define FTS5_TOKENIZE_PREFIX 0x0002 534 #define FTS5_TOKENIZE_DOCUMENT 0x0004 535 #define FTS5_TOKENIZE_AUX 0x0008 536 537 /* Flags that may be passed by the tokenizer implementation back to FTS5 538 ** as the third argument to the supplied xToken callback. */ 539 #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */ 540 541 /* 542 ** END OF CUSTOM TOKENIZERS 543 *************************************************************************/ 544 545 /************************************************************************* 546 ** FTS5 EXTENSION REGISTRATION API 547 */ 548 typedef struct fts5_api fts5_api; 549 struct fts5_api { 550 int iVersion; /* Currently always set to 2 */ 551 552 /* Create a new tokenizer */ 553 int (*xCreateTokenizer)( 554 fts5_api *pApi, 555 const char *zName, 556 void *pContext, 557 fts5_tokenizer *pTokenizer, 558 void (*xDestroy)(void*) 559 ); 560 561 /* Find an existing tokenizer */ 562 int (*xFindTokenizer)( 563 fts5_api *pApi, 564 const char *zName, 565 void **ppContext, 566 fts5_tokenizer *pTokenizer 567 ); 568 569 /* Create a new auxiliary function */ 570 int (*xCreateFunction)( 571 fts5_api *pApi, 572 const char *zName, 573 void *pContext, 574 fts5_extension_function xFunction, 575 void (*xDestroy)(void*) 576 ); 577 }; 578 579 /* 580 ** END OF REGISTRATION API 581 *************************************************************************/ 582 583 #ifdef __cplusplus 584 } /* end of the 'extern "C"' block */ 585 #endif 586 587 #endif /* _FTS5_H */ 588 589 #line 1 "fts5Int.h" 590 /* 591 ** 2014 May 31 592 ** 593 ** The author disclaims copyright to this source code. In place of 594 ** a legal notice, here is a blessing: 595 ** 596 ** May you do good and not evil. 597 ** May you find forgiveness for yourself and forgive others. 598 ** May you share freely, never taking more than you give. 599 ** 600 ****************************************************************************** 601 ** 602 */ 603 #ifndef _FTS5INT_H 604 #define _FTS5INT_H 605 606 /* #include "fts5.h" */ 607 #include "sqlite3ext.h" 608 SQLITE_EXTENSION_INIT1 609 610 #include <string.h> 611 #include <assert.h> 612 613 #ifndef SQLITE_AMALGAMATION 614 615 typedef unsigned char u8; 616 typedef unsigned int u32; 617 typedef unsigned short u16; 618 typedef short i16; 619 typedef sqlite3_int64 i64; 620 typedef sqlite3_uint64 u64; 621 622 #ifndef ArraySize 623 # define ArraySize(x) ((int)(sizeof(x) / sizeof(x[0]))) 624 #endif 625 626 #define testcase(x) 627 628 #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) 629 # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 630 #endif 631 #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) 632 # define ALWAYS(X) (1) 633 # define NEVER(X) (0) 634 #elif !defined(NDEBUG) 635 # define ALWAYS(X) ((X)?1:(assert(0),0)) 636 # define NEVER(X) ((X)?(assert(0),1):0) 637 #else 638 # define ALWAYS(X) (X) 639 # define NEVER(X) (X) 640 #endif 641 642 #define MIN(x,y) (((x) < (y)) ? (x) : (y)) 643 #define MAX(x,y) (((x) > (y)) ? (x) : (y)) 644 645 /* 646 ** Constants for the largest and smallest possible 64-bit signed integers. 647 */ 648 # define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) 649 # define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) 650 651 #endif 652 653 /* Truncate very long tokens to this many bytes. Hard limit is 654 ** (65536-1-1-4-9)==65521 bytes. The limiting factor is the 16-bit offset 655 ** field that occurs at the start of each leaf page (see fts5_index.c). */ 656 #define FTS5_MAX_TOKEN_SIZE 32768 657 658 /* 659 ** Maximum number of prefix indexes on single FTS5 table. This must be 660 ** less than 32. If it is set to anything large than that, an #error 661 ** directive in fts5_index.c will cause the build to fail. 662 */ 663 #define FTS5_MAX_PREFIX_INDEXES 31 664 665 /* 666 ** Maximum segments permitted in a single index 667 */ 668 #define FTS5_MAX_SEGMENT 2000 669 670 #define FTS5_DEFAULT_NEARDIST 10 671 #define FTS5_DEFAULT_RANK "bm25" 672 673 /* Name of rank and rowid columns */ 674 #define FTS5_RANK_NAME "rank" 675 #define FTS5_ROWID_NAME "rowid" 676 677 #ifdef SQLITE_DEBUG 678 # define FTS5_CORRUPT sqlite3Fts5Corrupt() 679 static int sqlite3Fts5Corrupt(void); 680 #else 681 # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB 682 #endif 683 684 /* 685 ** The assert_nc() macro is similar to the assert() macro, except that it 686 ** is used for assert() conditions that are true only if it can be 687 ** guranteed that the database is not corrupt. 688 */ 689 #ifdef SQLITE_DEBUG 690 extern int sqlite3_fts5_may_be_corrupt; 691 # define assert_nc(x) assert(sqlite3_fts5_may_be_corrupt || (x)) 692 #else 693 # define assert_nc(x) assert(x) 694 #endif 695 696 /* 697 ** A version of memcmp() that does not cause asan errors if one of the pointer 698 ** parameters is NULL and the number of bytes to compare is zero. 699 */ 700 #define fts5Memcmp(s1, s2, n) ((n)<=0 ? 0 : memcmp((s1), (s2), (n))) 701 702 /* Mark a function parameter as unused, to suppress nuisance compiler 703 ** warnings. */ 704 #ifndef UNUSED_PARAM 705 # define UNUSED_PARAM(X) (void)(X) 706 #endif 707 708 #ifndef UNUSED_PARAM2 709 # define UNUSED_PARAM2(X, Y) (void)(X), (void)(Y) 710 #endif 711 712 typedef struct Fts5Global Fts5Global; 713 typedef struct Fts5Colset Fts5Colset; 714 715 /* If a NEAR() clump or phrase may only match a specific set of columns, 716 ** then an object of the following type is used to record the set of columns. 717 ** Each entry in the aiCol[] array is a column that may be matched. 718 ** 719 ** This object is used by fts5_expr.c and fts5_index.c. 720 */ 721 struct Fts5Colset { 722 int nCol; 723 int aiCol[1]; 724 }; 725 726 727 728 /************************************************************************** 729 ** Interface to code in fts5_config.c. fts5_config.c contains contains code 730 ** to parse the arguments passed to the CREATE VIRTUAL TABLE statement. 731 */ 732 733 typedef struct Fts5Config Fts5Config; 734 735 /* 736 ** An instance of the following structure encodes all information that can 737 ** be gleaned from the CREATE VIRTUAL TABLE statement. 738 ** 739 ** And all information loaded from the %_config table. 740 ** 741 ** nAutomerge: 742 ** The minimum number of segments that an auto-merge operation should 743 ** attempt to merge together. A value of 1 sets the object to use the 744 ** compile time default. Zero disables auto-merge altogether. 745 ** 746 ** zContent: 747 ** 748 ** zContentRowid: 749 ** The value of the content_rowid= option, if one was specified. Or 750 ** the string "rowid" otherwise. This text is not quoted - if it is 751 ** used as part of an SQL statement it needs to be quoted appropriately. 752 ** 753 ** zContentExprlist: 754 ** 755 ** pzErrmsg: 756 ** This exists in order to allow the fts5_index.c module to return a 757 ** decent error message if it encounters a file-format version it does 758 ** not understand. 759 ** 760 ** bColumnsize: 761 ** True if the %_docsize table is created. 762 ** 763 ** bPrefixIndex: 764 ** This is only used for debugging. If set to false, any prefix indexes 765 ** are ignored. This value is configured using: 766 ** 767 ** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); 768 ** 769 */ 770 struct Fts5Config { 771 sqlite3 *db; /* Database handle */ 772 char *zDb; /* Database holding FTS index (e.g. "main") */ 773 char *zName; /* Name of FTS index */ 774 int nCol; /* Number of columns */ 775 char **azCol; /* Column names */ 776 u8 *abUnindexed; /* True for unindexed columns */ 777 int nPrefix; /* Number of prefix indexes */ 778 int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ 779 int eContent; /* An FTS5_CONTENT value */ 780 char *zContent; /* content table */ 781 char *zContentRowid; /* "content_rowid=" option value */ 782 int bColumnsize; /* "columnsize=" option value (dflt==1) */ 783 int eDetail; /* FTS5_DETAIL_XXX value */ 784 char *zContentExprlist; 785 Fts5Tokenizer *pTok; 786 fts5_tokenizer *pTokApi; 787 int bLock; /* True when table is preparing statement */ 788 int ePattern; /* FTS_PATTERN_XXX constant */ 789 790 /* Values loaded from the %_config table */ 791 int iCookie; /* Incremented when %_config is modified */ 792 int pgsz; /* Approximate page size used in %_data */ 793 int nAutomerge; /* 'automerge' setting */ 794 int nCrisisMerge; /* Maximum allowed segments per level */ 795 int nUsermerge; /* 'usermerge' setting */ 796 int nHashSize; /* Bytes of memory for in-memory hash */ 797 char *zRank; /* Name of rank function */ 798 char *zRankArgs; /* Arguments to rank function */ 799 800 /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ 801 char **pzErrmsg; 802 803 #ifdef SQLITE_DEBUG 804 int bPrefixIndex; /* True to use prefix-indexes */ 805 #endif 806 }; 807 808 /* Current expected value of %_config table 'version' field */ 809 #define FTS5_CURRENT_VERSION 4 810 811 #define FTS5_CONTENT_NORMAL 0 812 #define FTS5_CONTENT_NONE 1 813 #define FTS5_CONTENT_EXTERNAL 2 814 815 #define FTS5_DETAIL_FULL 0 816 #define FTS5_DETAIL_NONE 1 817 #define FTS5_DETAIL_COLUMNS 2 818 819 #define FTS5_PATTERN_NONE 0 820 #define FTS5_PATTERN_LIKE 65 /* matches SQLITE_INDEX_CONSTRAINT_LIKE */ 821 #define FTS5_PATTERN_GLOB 66 /* matches SQLITE_INDEX_CONSTRAINT_GLOB */ 822 823 static int sqlite3Fts5ConfigParse( 824 Fts5Global*, sqlite3*, int, const char **, Fts5Config**, char** 825 ); 826 static void sqlite3Fts5ConfigFree(Fts5Config*); 827 828 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig); 829 830 static int sqlite3Fts5Tokenize( 831 Fts5Config *pConfig, /* FTS5 Configuration object */ 832 int flags, /* FTS5_TOKENIZE_* flags */ 833 const char *pText, int nText, /* Text to tokenize */ 834 void *pCtx, /* Context passed to xToken() */ 835 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ 836 ); 837 838 static void sqlite3Fts5Dequote(char *z); 839 840 /* Load the contents of the %_config table */ 841 static int sqlite3Fts5ConfigLoad(Fts5Config*, int); 842 843 /* Set the value of a single config attribute */ 844 static int sqlite3Fts5ConfigSetValue(Fts5Config*, const char*, sqlite3_value*, int*); 845 846 static int sqlite3Fts5ConfigParseRank(const char*, char**, char**); 847 848 /* 849 ** End of interface to code in fts5_config.c. 850 **************************************************************************/ 851 852 /************************************************************************** 853 ** Interface to code in fts5_buffer.c. 854 */ 855 856 /* 857 ** Buffer object for the incremental building of string data. 858 */ 859 typedef struct Fts5Buffer Fts5Buffer; 860 struct Fts5Buffer { 861 u8 *p; 862 int n; 863 int nSpace; 864 }; 865 866 static int sqlite3Fts5BufferSize(int*, Fts5Buffer*, u32); 867 static void sqlite3Fts5BufferAppendVarint(int*, Fts5Buffer*, i64); 868 static void sqlite3Fts5BufferAppendBlob(int*, Fts5Buffer*, u32, const u8*); 869 static void sqlite3Fts5BufferAppendString(int *, Fts5Buffer*, const char*); 870 static void sqlite3Fts5BufferFree(Fts5Buffer*); 871 static void sqlite3Fts5BufferZero(Fts5Buffer*); 872 static void sqlite3Fts5BufferSet(int*, Fts5Buffer*, int, const u8*); 873 static void sqlite3Fts5BufferAppendPrintf(int *, Fts5Buffer*, char *zFmt, ...); 874 875 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...); 876 877 #define fts5BufferZero(x) sqlite3Fts5BufferZero(x) 878 #define fts5BufferAppendVarint(a,b,c) sqlite3Fts5BufferAppendVarint(a,b,(i64)c) 879 #define fts5BufferFree(a) sqlite3Fts5BufferFree(a) 880 #define fts5BufferAppendBlob(a,b,c,d) sqlite3Fts5BufferAppendBlob(a,b,c,d) 881 #define fts5BufferSet(a,b,c,d) sqlite3Fts5BufferSet(a,b,c,d) 882 883 #define fts5BufferGrow(pRc,pBuf,nn) ( \ 884 (u32)((pBuf)->n) + (u32)(nn) <= (u32)((pBuf)->nSpace) ? 0 : \ 885 sqlite3Fts5BufferSize((pRc),(pBuf),(nn)+(pBuf)->n) \ 886 ) 887 888 /* Write and decode big-endian 32-bit integer values */ 889 static void sqlite3Fts5Put32(u8*, int); 890 static int sqlite3Fts5Get32(const u8*); 891 892 #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) 893 #define FTS5_POS2OFFSET(iPos) (int)(iPos & 0x7FFFFFFF) 894 895 typedef struct Fts5PoslistReader Fts5PoslistReader; 896 struct Fts5PoslistReader { 897 /* Variables used only by sqlite3Fts5PoslistIterXXX() functions. */ 898 const u8 *a; /* Position list to iterate through */ 899 int n; /* Size of buffer at a[] in bytes */ 900 int i; /* Current offset in a[] */ 901 902 u8 bFlag; /* For client use (any custom purpose) */ 903 904 /* Output variables */ 905 u8 bEof; /* Set to true at EOF */ 906 i64 iPos; /* (iCol<<32) + iPos */ 907 }; 908 static int sqlite3Fts5PoslistReaderInit( 909 const u8 *a, int n, /* Poslist buffer to iterate through */ 910 Fts5PoslistReader *pIter /* Iterator object to initialize */ 911 ); 912 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader*); 913 914 typedef struct Fts5PoslistWriter Fts5PoslistWriter; 915 struct Fts5PoslistWriter { 916 i64 iPrev; 917 }; 918 static int sqlite3Fts5PoslistWriterAppend(Fts5Buffer*, Fts5PoslistWriter*, i64); 919 static void sqlite3Fts5PoslistSafeAppend(Fts5Buffer*, i64*, i64); 920 921 static int sqlite3Fts5PoslistNext64( 922 const u8 *a, int n, /* Buffer containing poslist */ 923 int *pi, /* IN/OUT: Offset within a[] */ 924 i64 *piOff /* IN/OUT: Current offset */ 925 ); 926 927 /* Malloc utility */ 928 static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte); 929 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn); 930 931 /* Character set tests (like isspace(), isalpha() etc.) */ 932 static int sqlite3Fts5IsBareword(char t); 933 934 935 /* Bucket of terms object used by the integrity-check in offsets=0 mode. */ 936 typedef struct Fts5Termset Fts5Termset; 937 static int sqlite3Fts5TermsetNew(Fts5Termset**); 938 static int sqlite3Fts5TermsetAdd(Fts5Termset*, int, const char*, int, int *pbPresent); 939 static void sqlite3Fts5TermsetFree(Fts5Termset*); 940 941 /* 942 ** End of interface to code in fts5_buffer.c. 943 **************************************************************************/ 944 945 /************************************************************************** 946 ** Interface to code in fts5_index.c. fts5_index.c contains contains code 947 ** to access the data stored in the %_data table. 948 */ 949 950 typedef struct Fts5Index Fts5Index; 951 typedef struct Fts5IndexIter Fts5IndexIter; 952 953 struct Fts5IndexIter { 954 i64 iRowid; 955 const u8 *pData; 956 int nData; 957 u8 bEof; 958 }; 959 960 #define sqlite3Fts5IterEof(x) ((x)->bEof) 961 962 /* 963 ** Values used as part of the flags argument passed to IndexQuery(). 964 */ 965 #define FTS5INDEX_QUERY_PREFIX 0x0001 /* Prefix query */ 966 #define FTS5INDEX_QUERY_DESC 0x0002 /* Docs in descending rowid order */ 967 #define FTS5INDEX_QUERY_TEST_NOIDX 0x0004 /* Do not use prefix index */ 968 #define FTS5INDEX_QUERY_SCAN 0x0008 /* Scan query (fts5vocab) */ 969 970 /* The following are used internally by the fts5_index.c module. They are 971 ** defined here only to make it easier to avoid clashes with the flags 972 ** above. */ 973 #define FTS5INDEX_QUERY_SKIPEMPTY 0x0010 974 #define FTS5INDEX_QUERY_NOOUTPUT 0x0020 975 976 /* 977 ** Create/destroy an Fts5Index object. 978 */ 979 static int sqlite3Fts5IndexOpen(Fts5Config *pConfig, int bCreate, Fts5Index**, char**); 980 static int sqlite3Fts5IndexClose(Fts5Index *p); 981 982 /* 983 ** Return a simple checksum value based on the arguments. 984 */ 985 static u64 sqlite3Fts5IndexEntryCksum( 986 i64 iRowid, 987 int iCol, 988 int iPos, 989 int iIdx, 990 const char *pTerm, 991 int nTerm 992 ); 993 994 /* 995 ** Argument p points to a buffer containing utf-8 text that is n bytes in 996 ** size. Return the number of bytes in the nChar character prefix of the 997 ** buffer, or 0 if there are less than nChar characters in total. 998 */ 999 static int sqlite3Fts5IndexCharlenToBytelen( 1000 const char *p, 1001 int nByte, 1002 int nChar 1003 ); 1004 1005 /* 1006 ** Open a new iterator to iterate though all rowids that match the 1007 ** specified token or token prefix. 1008 */ 1009 static int sqlite3Fts5IndexQuery( 1010 Fts5Index *p, /* FTS index to query */ 1011 const char *pToken, int nToken, /* Token (or prefix) to query for */ 1012 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ 1013 Fts5Colset *pColset, /* Match these columns only */ 1014 Fts5IndexIter **ppIter /* OUT: New iterator object */ 1015 ); 1016 1017 /* 1018 ** The various operations on open token or token prefix iterators opened 1019 ** using sqlite3Fts5IndexQuery(). 1020 */ 1021 static int sqlite3Fts5IterNext(Fts5IndexIter*); 1022 static int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); 1023 1024 /* 1025 ** Close an iterator opened by sqlite3Fts5IndexQuery(). 1026 */ 1027 static void sqlite3Fts5IterClose(Fts5IndexIter*); 1028 1029 /* 1030 ** Close the reader blob handle, if it is open. 1031 */ 1032 static void sqlite3Fts5IndexCloseReader(Fts5Index*); 1033 1034 /* 1035 ** This interface is used by the fts5vocab module. 1036 */ 1037 static const char *sqlite3Fts5IterTerm(Fts5IndexIter*, int*); 1038 static int sqlite3Fts5IterNextScan(Fts5IndexIter*); 1039 static void *sqlite3Fts5StructureRef(Fts5Index*); 1040 static void sqlite3Fts5StructureRelease(void*); 1041 static int sqlite3Fts5StructureTest(Fts5Index*, void*); 1042 1043 1044 /* 1045 ** Insert or remove data to or from the index. Each time a document is 1046 ** added to or removed from the index, this function is called one or more 1047 ** times. 1048 ** 1049 ** For an insert, it must be called once for each token in the new document. 1050 ** If the operation is a delete, it must be called (at least) once for each 1051 ** unique token in the document with an iCol value less than zero. The iPos 1052 ** argument is ignored for a delete. 1053 */ 1054 static int sqlite3Fts5IndexWrite( 1055 Fts5Index *p, /* Index to write to */ 1056 int iCol, /* Column token appears in (-ve -> delete) */ 1057 int iPos, /* Position of token within column */ 1058 const char *pToken, int nToken /* Token to add or remove to or from index */ 1059 ); 1060 1061 /* 1062 ** Indicate that subsequent calls to sqlite3Fts5IndexWrite() pertain to 1063 ** document iDocid. 1064 */ 1065 static int sqlite3Fts5IndexBeginWrite( 1066 Fts5Index *p, /* Index to write to */ 1067 int bDelete, /* True if current operation is a delete */ 1068 i64 iDocid /* Docid to add or remove data from */ 1069 ); 1070 1071 /* 1072 ** Flush any data stored in the in-memory hash tables to the database. 1073 ** Also close any open blob handles. 1074 */ 1075 static int sqlite3Fts5IndexSync(Fts5Index *p); 1076 1077 /* 1078 ** Discard any data stored in the in-memory hash tables. Do not write it 1079 ** to the database. Additionally, assume that the contents of the %_data 1080 ** table may have changed on disk. So any in-memory caches of %_data 1081 ** records must be invalidated. 1082 */ 1083 static int sqlite3Fts5IndexRollback(Fts5Index *p); 1084 1085 /* 1086 ** Get or set the "averages" values. 1087 */ 1088 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize); 1089 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8*, int); 1090 1091 /* 1092 ** Functions called by the storage module as part of integrity-check. 1093 */ 1094 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index*, u64 cksum, int bUseCksum); 1095 1096 /* 1097 ** Called during virtual module initialization to register UDF 1098 ** fts5_decode() with SQLite 1099 */ 1100 static int sqlite3Fts5IndexInit(sqlite3*); 1101 1102 static int sqlite3Fts5IndexSetCookie(Fts5Index*, int); 1103 1104 /* 1105 ** Return the total number of entries read from the %_data table by 1106 ** this connection since it was created. 1107 */ 1108 static int sqlite3Fts5IndexReads(Fts5Index *p); 1109 1110 static int sqlite3Fts5IndexReinit(Fts5Index *p); 1111 static int sqlite3Fts5IndexOptimize(Fts5Index *p); 1112 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge); 1113 static int sqlite3Fts5IndexReset(Fts5Index *p); 1114 1115 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p); 1116 1117 /* 1118 ** End of interface to code in fts5_index.c. 1119 **************************************************************************/ 1120 1121 /************************************************************************** 1122 ** Interface to code in fts5_varint.c. 1123 */ 1124 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v); 1125 static int sqlite3Fts5GetVarintLen(u32 iVal); 1126 static u8 sqlite3Fts5GetVarint(const unsigned char*, u64*); 1127 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v); 1128 1129 #define fts5GetVarint32(a,b) sqlite3Fts5GetVarint32(a,(u32*)&b) 1130 #define fts5GetVarint sqlite3Fts5GetVarint 1131 1132 #define fts5FastGetVarint32(a, iOff, nVal) { \ 1133 nVal = (a)[iOff++]; \ 1134 if( nVal & 0x80 ){ \ 1135 iOff--; \ 1136 iOff += fts5GetVarint32(&(a)[iOff], nVal); \ 1137 } \ 1138 } 1139 1140 1141 /* 1142 ** End of interface to code in fts5_varint.c. 1143 **************************************************************************/ 1144 1145 1146 /************************************************************************** 1147 ** Interface to code in fts5_main.c. 1148 */ 1149 1150 /* 1151 ** Virtual-table object. 1152 */ 1153 typedef struct Fts5Table Fts5Table; 1154 struct Fts5Table { 1155 sqlite3_vtab base; /* Base class used by SQLite core */ 1156 Fts5Config *pConfig; /* Virtual table configuration */ 1157 Fts5Index *pIndex; /* Full-text index */ 1158 }; 1159 1160 static int sqlite3Fts5GetTokenizer( 1161 Fts5Global*, 1162 const char **azArg, 1163 int nArg, 1164 Fts5Config*, 1165 char **pzErr 1166 ); 1167 1168 static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); 1169 1170 static int sqlite3Fts5FlushToDisk(Fts5Table*); 1171 1172 /* 1173 ** End of interface to code in fts5.c. 1174 **************************************************************************/ 1175 1176 /************************************************************************** 1177 ** Interface to code in fts5_hash.c. 1178 */ 1179 typedef struct Fts5Hash Fts5Hash; 1180 1181 /* 1182 ** Create a hash table, free a hash table. 1183 */ 1184 static int sqlite3Fts5HashNew(Fts5Config*, Fts5Hash**, int *pnSize); 1185 static void sqlite3Fts5HashFree(Fts5Hash*); 1186 1187 static int sqlite3Fts5HashWrite( 1188 Fts5Hash*, 1189 i64 iRowid, /* Rowid for this entry */ 1190 int iCol, /* Column token appears in (-ve -> delete) */ 1191 int iPos, /* Position of token within column */ 1192 char bByte, 1193 const char *pToken, int nToken /* Token to add or remove to or from index */ 1194 ); 1195 1196 /* 1197 ** Empty (but do not delete) a hash table. 1198 */ 1199 static void sqlite3Fts5HashClear(Fts5Hash*); 1200 1201 static int sqlite3Fts5HashQuery( 1202 Fts5Hash*, /* Hash table to query */ 1203 int nPre, 1204 const char *pTerm, int nTerm, /* Query term */ 1205 void **ppObj, /* OUT: Pointer to doclist for pTerm */ 1206 int *pnDoclist /* OUT: Size of doclist in bytes */ 1207 ); 1208 1209 static int sqlite3Fts5HashScanInit( 1210 Fts5Hash*, /* Hash table to query */ 1211 const char *pTerm, int nTerm /* Query prefix */ 1212 ); 1213 static void sqlite3Fts5HashScanNext(Fts5Hash*); 1214 static int sqlite3Fts5HashScanEof(Fts5Hash*); 1215 static void sqlite3Fts5HashScanEntry(Fts5Hash *, 1216 const char **pzTerm, /* OUT: term (nul-terminated) */ 1217 const u8 **ppDoclist, /* OUT: pointer to doclist */ 1218 int *pnDoclist /* OUT: size of doclist in bytes */ 1219 ); 1220 1221 1222 /* 1223 ** End of interface to code in fts5_hash.c. 1224 **************************************************************************/ 1225 1226 /************************************************************************** 1227 ** Interface to code in fts5_storage.c. fts5_storage.c contains contains 1228 ** code to access the data stored in the %_content and %_docsize tables. 1229 */ 1230 1231 #define FTS5_STMT_SCAN_ASC 0 /* SELECT rowid, * FROM ... ORDER BY 1 ASC */ 1232 #define FTS5_STMT_SCAN_DESC 1 /* SELECT rowid, * FROM ... ORDER BY 1 DESC */ 1233 #define FTS5_STMT_LOOKUP 2 /* SELECT rowid, * FROM ... WHERE rowid=? */ 1234 1235 typedef struct Fts5Storage Fts5Storage; 1236 1237 static int sqlite3Fts5StorageOpen(Fts5Config*, Fts5Index*, int, Fts5Storage**, char**); 1238 static int sqlite3Fts5StorageClose(Fts5Storage *p); 1239 static int sqlite3Fts5StorageRename(Fts5Storage*, const char *zName); 1240 1241 static int sqlite3Fts5DropAll(Fts5Config*); 1242 static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); 1243 1244 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**); 1245 static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*); 1246 static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); 1247 1248 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); 1249 1250 static int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt**, char**); 1251 static void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); 1252 1253 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); 1254 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); 1255 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); 1256 1257 static int sqlite3Fts5StorageSync(Fts5Storage *p); 1258 static int sqlite3Fts5StorageRollback(Fts5Storage *p); 1259 1260 static int sqlite3Fts5StorageConfigValue( 1261 Fts5Storage *p, const char*, sqlite3_value*, int 1262 ); 1263 1264 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p); 1265 static int sqlite3Fts5StorageRebuild(Fts5Storage *p); 1266 static int sqlite3Fts5StorageOptimize(Fts5Storage *p); 1267 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge); 1268 static int sqlite3Fts5StorageReset(Fts5Storage *p); 1269 1270 /* 1271 ** End of interface to code in fts5_storage.c. 1272 **************************************************************************/ 1273 1274 1275 /************************************************************************** 1276 ** Interface to code in fts5_expr.c. 1277 */ 1278 typedef struct Fts5Expr Fts5Expr; 1279 typedef struct Fts5ExprNode Fts5ExprNode; 1280 typedef struct Fts5Parse Fts5Parse; 1281 typedef struct Fts5Token Fts5Token; 1282 typedef struct Fts5ExprPhrase Fts5ExprPhrase; 1283 typedef struct Fts5ExprNearset Fts5ExprNearset; 1284 1285 struct Fts5Token { 1286 const char *p; /* Token text (not NULL terminated) */ 1287 int n; /* Size of buffer p in bytes */ 1288 }; 1289 1290 /* Parse a MATCH expression. */ 1291 static int sqlite3Fts5ExprNew( 1292 Fts5Config *pConfig, 1293 int bPhraseToAnd, 1294 int iCol, /* Column on LHS of MATCH operator */ 1295 const char *zExpr, 1296 Fts5Expr **ppNew, 1297 char **pzErr 1298 ); 1299 static int sqlite3Fts5ExprPattern( 1300 Fts5Config *pConfig, 1301 int bGlob, 1302 int iCol, 1303 const char *zText, 1304 Fts5Expr **pp 1305 ); 1306 1307 /* 1308 ** for(rc = sqlite3Fts5ExprFirst(pExpr, pIdx, bDesc); 1309 ** rc==SQLITE_OK && 0==sqlite3Fts5ExprEof(pExpr); 1310 ** rc = sqlite3Fts5ExprNext(pExpr) 1311 ** ){ 1312 ** // The document with rowid iRowid matches the expression! 1313 ** i64 iRowid = sqlite3Fts5ExprRowid(pExpr); 1314 ** } 1315 */ 1316 static int sqlite3Fts5ExprFirst(Fts5Expr*, Fts5Index *pIdx, i64 iMin, int bDesc); 1317 static int sqlite3Fts5ExprNext(Fts5Expr*, i64 iMax); 1318 static int sqlite3Fts5ExprEof(Fts5Expr*); 1319 static i64 sqlite3Fts5ExprRowid(Fts5Expr*); 1320 1321 static void sqlite3Fts5ExprFree(Fts5Expr*); 1322 static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2); 1323 1324 /* Called during startup to register a UDF with SQLite */ 1325 static int sqlite3Fts5ExprInit(Fts5Global*, sqlite3*); 1326 1327 static int sqlite3Fts5ExprPhraseCount(Fts5Expr*); 1328 static int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); 1329 static int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); 1330 1331 typedef struct Fts5PoslistPopulator Fts5PoslistPopulator; 1332 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr*, int); 1333 static int sqlite3Fts5ExprPopulatePoslists( 1334 Fts5Config*, Fts5Expr*, Fts5PoslistPopulator*, int, const char*, int 1335 ); 1336 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr*, i64); 1337 1338 static int sqlite3Fts5ExprClonePhrase(Fts5Expr*, int, Fts5Expr**); 1339 1340 static int sqlite3Fts5ExprPhraseCollist(Fts5Expr *, int, const u8 **, int *); 1341 1342 /******************************************* 1343 ** The fts5_expr.c API above this point is used by the other hand-written 1344 ** C code in this module. The interfaces below this point are called by 1345 ** the parser code in fts5parse.y. */ 1346 1347 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...); 1348 1349 static Fts5ExprNode *sqlite3Fts5ParseNode( 1350 Fts5Parse *pParse, 1351 int eType, 1352 Fts5ExprNode *pLeft, 1353 Fts5ExprNode *pRight, 1354 Fts5ExprNearset *pNear 1355 ); 1356 1357 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( 1358 Fts5Parse *pParse, 1359 Fts5ExprNode *pLeft, 1360 Fts5ExprNode *pRight 1361 ); 1362 1363 static Fts5ExprPhrase *sqlite3Fts5ParseTerm( 1364 Fts5Parse *pParse, 1365 Fts5ExprPhrase *pPhrase, 1366 Fts5Token *pToken, 1367 int bPrefix 1368 ); 1369 1370 static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase*); 1371 1372 static Fts5ExprNearset *sqlite3Fts5ParseNearset( 1373 Fts5Parse*, 1374 Fts5ExprNearset*, 1375 Fts5ExprPhrase* 1376 ); 1377 1378 static Fts5Colset *sqlite3Fts5ParseColset( 1379 Fts5Parse*, 1380 Fts5Colset*, 1381 Fts5Token * 1382 ); 1383 1384 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase*); 1385 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset*); 1386 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode*); 1387 1388 static void sqlite3Fts5ParseSetDistance(Fts5Parse*, Fts5ExprNearset*, Fts5Token*); 1389 static void sqlite3Fts5ParseSetColset(Fts5Parse*, Fts5ExprNode*, Fts5Colset*); 1390 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse*, Fts5Colset*); 1391 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p); 1392 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token*); 1393 1394 /* 1395 ** End of interface to code in fts5_expr.c. 1396 **************************************************************************/ 1397 1398 1399 1400 /************************************************************************** 1401 ** Interface to code in fts5_aux.c. 1402 */ 1403 1404 static int sqlite3Fts5AuxInit(fts5_api*); 1405 /* 1406 ** End of interface to code in fts5_aux.c. 1407 **************************************************************************/ 1408 1409 /************************************************************************** 1410 ** Interface to code in fts5_tokenizer.c. 1411 */ 1412 1413 static int sqlite3Fts5TokenizerInit(fts5_api*); 1414 static int sqlite3Fts5TokenizerPattern( 1415 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), 1416 Fts5Tokenizer *pTok 1417 ); 1418 /* 1419 ** End of interface to code in fts5_tokenizer.c. 1420 **************************************************************************/ 1421 1422 /************************************************************************** 1423 ** Interface to code in fts5_vocab.c. 1424 */ 1425 1426 static int sqlite3Fts5VocabInit(Fts5Global*, sqlite3*); 1427 1428 /* 1429 ** End of interface to code in fts5_vocab.c. 1430 **************************************************************************/ 1431 1432 1433 /************************************************************************** 1434 ** Interface to automatically generated code in fts5_unicode2.c. 1435 */ 1436 static int sqlite3Fts5UnicodeIsdiacritic(int c); 1437 static int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic); 1438 1439 static int sqlite3Fts5UnicodeCatParse(const char*, u8*); 1440 static int sqlite3Fts5UnicodeCategory(u32 iCode); 1441 static void sqlite3Fts5UnicodeAscii(u8*, u8*); 1442 /* 1443 ** End of interface to code in fts5_unicode2.c. 1444 **************************************************************************/ 1445 1446 #endif 1447 1448 #line 1 "fts5parse.h" 1449 #define FTS5_OR 1 1450 #define FTS5_AND 2 1451 #define FTS5_NOT 3 1452 #define FTS5_TERM 4 1453 #define FTS5_COLON 5 1454 #define FTS5_MINUS 6 1455 #define FTS5_LCP 7 1456 #define FTS5_RCP 8 1457 #define FTS5_STRING 9 1458 #define FTS5_LP 10 1459 #define FTS5_RP 11 1460 #define FTS5_CARET 12 1461 #define FTS5_COMMA 13 1462 #define FTS5_PLUS 14 1463 #define FTS5_STAR 15 1464 1465 #line 1 "fts5parse.c" 1466 /* This file is automatically generated by Lemon from input grammar 1467 ** source file "fts5parse.y". */ 1468 /* 1469 ** 2000-05-29 1470 ** 1471 ** The author disclaims copyright to this source code. In place of 1472 ** a legal notice, here is a blessing: 1473 ** 1474 ** May you do good and not evil. 1475 ** May you find forgiveness for yourself and forgive others. 1476 ** May you share freely, never taking more than you give. 1477 ** 1478 ************************************************************************* 1479 ** Driver template for the LEMON parser generator. 1480 ** 1481 ** The "lemon" program processes an LALR(1) input grammar file, then uses 1482 ** this template to construct a parser. The "lemon" program inserts text 1483 ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the 1484 ** interstitial "-" characters) contained in this template is changed into 1485 ** the value of the %name directive from the grammar. Otherwise, the content 1486 ** of this template is copied straight through into the generate parser 1487 ** source file. 1488 ** 1489 ** The following is the concatenation of all %include directives from the 1490 ** input grammar file: 1491 */ 1492 /************ Begin %include sections from the grammar ************************/ 1493 #line 47 "fts5parse.y" 1494 1495 /* #include "fts5Int.h" */ 1496 /* #include "fts5parse.h" */ 1497 1498 /* 1499 ** Disable all error recovery processing in the parser push-down 1500 ** automaton. 1501 */ 1502 #define fts5YYNOERRORRECOVERY 1 1503 1504 /* 1505 ** Make fts5yytestcase() the same as testcase() 1506 */ 1507 #define fts5yytestcase(X) testcase(X) 1508 1509 /* 1510 ** Indicate that sqlite3ParserFree() will never be called with a null 1511 ** pointer. 1512 */ 1513 #define fts5YYPARSEFREENOTNULL 1 1514 1515 /* 1516 ** Alternative datatype for the argument to the malloc() routine passed 1517 ** into sqlite3ParserAlloc(). The default is size_t. 1518 */ 1519 #define fts5YYMALLOCARGTYPE u64 1520 1521 #line 57 "fts5parse.sql" 1522 /**************** End of %include directives **********************************/ 1523 /* These constants specify the various numeric values for terminal symbols. 1524 ***************** Begin token definitions *************************************/ 1525 #ifndef FTS5_OR 1526 #define FTS5_OR 1 1527 #define FTS5_AND 2 1528 #define FTS5_NOT 3 1529 #define FTS5_TERM 4 1530 #define FTS5_COLON 5 1531 #define FTS5_MINUS 6 1532 #define FTS5_LCP 7 1533 #define FTS5_RCP 8 1534 #define FTS5_STRING 9 1535 #define FTS5_LP 10 1536 #define FTS5_RP 11 1537 #define FTS5_CARET 12 1538 #define FTS5_COMMA 13 1539 #define FTS5_PLUS 14 1540 #define FTS5_STAR 15 1541 #endif 1542 /**************** End token definitions ***************************************/ 1543 1544 /* The next sections is a series of control #defines. 1545 ** various aspects of the generated parser. 1546 ** fts5YYCODETYPE is the data type used to store the integer codes 1547 ** that represent terminal and non-terminal symbols. 1548 ** "unsigned char" is used if there are fewer than 1549 ** 256 symbols. Larger types otherwise. 1550 ** fts5YYNOCODE is a number of type fts5YYCODETYPE that is not used for 1551 ** any terminal or nonterminal symbol. 1552 ** fts5YYFALLBACK If defined, this indicates that one or more tokens 1553 ** (also known as: "terminal symbols") have fall-back 1554 ** values which should be used if the original symbol 1555 ** would not parse. This permits keywords to sometimes 1556 ** be used as identifiers, for example. 1557 ** fts5YYACTIONTYPE is the data type used for "action codes" - numbers 1558 ** that indicate what to do in response to the next 1559 ** token. 1560 ** sqlite3Fts5ParserFTS5TOKENTYPE is the data type used for minor type for terminal 1561 ** symbols. Background: A "minor type" is a semantic 1562 ** value associated with a terminal or non-terminal 1563 ** symbols. For example, for an "ID" terminal symbol, 1564 ** the minor type might be the name of the identifier. 1565 ** Each non-terminal can have a different minor type. 1566 ** Terminal symbols all have the same minor type, though. 1567 ** This macros defines the minor type for terminal 1568 ** symbols. 1569 ** fts5YYMINORTYPE is the data type used for all minor types. 1570 ** This is typically a union of many types, one of 1571 ** which is sqlite3Fts5ParserFTS5TOKENTYPE. The entry in the union 1572 ** for terminal symbols is called "fts5yy0". 1573 ** fts5YYSTACKDEPTH is the maximum depth of the parser's stack. If 1574 ** zero the stack is dynamically sized using realloc() 1575 ** sqlite3Fts5ParserARG_SDECL A static variable declaration for the %extra_argument 1576 ** sqlite3Fts5ParserARG_PDECL A parameter declaration for the %extra_argument 1577 ** sqlite3Fts5ParserARG_PARAM Code to pass %extra_argument as a subroutine parameter 1578 ** sqlite3Fts5ParserARG_STORE Code to store %extra_argument into fts5yypParser 1579 ** sqlite3Fts5ParserARG_FETCH Code to extract %extra_argument from fts5yypParser 1580 ** sqlite3Fts5ParserCTX_* As sqlite3Fts5ParserARG_ except for %extra_context 1581 ** fts5YYERRORSYMBOL is the code number of the error symbol. If not 1582 ** defined, then do no error processing. 1583 ** fts5YYNSTATE the combined number of states. 1584 ** fts5YYNRULE the number of rules in the grammar 1585 ** fts5YYNFTS5TOKEN Number of terminal symbols 1586 ** fts5YY_MAX_SHIFT Maximum value for shift actions 1587 ** fts5YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions 1588 ** fts5YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions 1589 ** fts5YY_ERROR_ACTION The fts5yy_action[] code for syntax error 1590 ** fts5YY_ACCEPT_ACTION The fts5yy_action[] code for accept 1591 ** fts5YY_NO_ACTION The fts5yy_action[] code for no-op 1592 ** fts5YY_MIN_REDUCE Minimum value for reduce actions 1593 ** fts5YY_MAX_REDUCE Maximum value for reduce actions 1594 */ 1595 #ifndef INTERFACE 1596 # define INTERFACE 1 1597 #endif 1598 /************* Begin control #defines *****************************************/ 1599 #define fts5YYCODETYPE unsigned char 1600 #define fts5YYNOCODE 27 1601 #define fts5YYACTIONTYPE unsigned char 1602 #define sqlite3Fts5ParserFTS5TOKENTYPE Fts5Token 1603 typedef union { 1604 int fts5yyinit; 1605 sqlite3Fts5ParserFTS5TOKENTYPE fts5yy0; 1606 int fts5yy4; 1607 Fts5Colset* fts5yy11; 1608 Fts5ExprNode* fts5yy24; 1609 Fts5ExprNearset* fts5yy46; 1610 Fts5ExprPhrase* fts5yy53; 1611 } fts5YYMINORTYPE; 1612 #ifndef fts5YYSTACKDEPTH 1613 #define fts5YYSTACKDEPTH 100 1614 #endif 1615 #define sqlite3Fts5ParserARG_SDECL Fts5Parse *pParse; 1616 #define sqlite3Fts5ParserARG_PDECL ,Fts5Parse *pParse 1617 #define sqlite3Fts5ParserARG_PARAM ,pParse 1618 #define sqlite3Fts5ParserARG_FETCH Fts5Parse *pParse=fts5yypParser->pParse; 1619 #define sqlite3Fts5ParserARG_STORE fts5yypParser->pParse=pParse; 1620 #define sqlite3Fts5ParserCTX_SDECL 1621 #define sqlite3Fts5ParserCTX_PDECL 1622 #define sqlite3Fts5ParserCTX_PARAM 1623 #define sqlite3Fts5ParserCTX_FETCH 1624 #define sqlite3Fts5ParserCTX_STORE 1625 #define fts5YYNSTATE 35 1626 #define fts5YYNRULE 28 1627 #define fts5YYNRULE_WITH_ACTION 28 1628 #define fts5YYNFTS5TOKEN 16 1629 #define fts5YY_MAX_SHIFT 34 1630 #define fts5YY_MIN_SHIFTREDUCE 52 1631 #define fts5YY_MAX_SHIFTREDUCE 79 1632 #define fts5YY_ERROR_ACTION 80 1633 #define fts5YY_ACCEPT_ACTION 81 1634 #define fts5YY_NO_ACTION 82 1635 #define fts5YY_MIN_REDUCE 83 1636 #define fts5YY_MAX_REDUCE 110 1637 /************* End control #defines *******************************************/ 1638 #define fts5YY_NLOOKAHEAD ((int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0]))) 1639 1640 /* Define the fts5yytestcase() macro to be a no-op if is not already defined 1641 ** otherwise. 1642 ** 1643 ** Applications can choose to define fts5yytestcase() in the %include section 1644 ** to a macro that can assist in verifying code coverage. For production 1645 ** code the fts5yytestcase() macro should be turned off. But it is useful 1646 ** for testing. 1647 */ 1648 #ifndef fts5yytestcase 1649 # define fts5yytestcase(X) 1650 #endif 1651 1652 1653 /* Next are the tables used to determine what action to take based on the 1654 ** current state and lookahead token. These tables are used to implement 1655 ** functions that take a state number and lookahead value and return an 1656 ** action integer. 1657 ** 1658 ** Suppose the action integer is N. Then the action is determined as 1659 ** follows 1660 ** 1661 ** 0 <= N <= fts5YY_MAX_SHIFT Shift N. That is, push the lookahead 1662 ** token onto the stack and goto state N. 1663 ** 1664 ** N between fts5YY_MIN_SHIFTREDUCE Shift to an arbitrary state then 1665 ** and fts5YY_MAX_SHIFTREDUCE reduce by rule N-fts5YY_MIN_SHIFTREDUCE. 1666 ** 1667 ** N == fts5YY_ERROR_ACTION A syntax error has occurred. 1668 ** 1669 ** N == fts5YY_ACCEPT_ACTION The parser accepts its input. 1670 ** 1671 ** N == fts5YY_NO_ACTION No such action. Denotes unused 1672 ** slots in the fts5yy_action[] table. 1673 ** 1674 ** N between fts5YY_MIN_REDUCE Reduce by rule N-fts5YY_MIN_REDUCE 1675 ** and fts5YY_MAX_REDUCE 1676 ** 1677 ** The action table is constructed as a single large table named fts5yy_action[]. 1678 ** Given state S and lookahead X, the action is computed as either: 1679 ** 1680 ** (A) N = fts5yy_action[ fts5yy_shift_ofst[S] + X ] 1681 ** (B) N = fts5yy_default[S] 1682 ** 1683 ** The (A) formula is preferred. The B formula is used instead if 1684 ** fts5yy_lookahead[fts5yy_shift_ofst[S]+X] is not equal to X. 1685 ** 1686 ** The formulas above are for computing the action when the lookahead is 1687 ** a terminal symbol. If the lookahead is a non-terminal (as occurs after 1688 ** a reduce action) then the fts5yy_reduce_ofst[] array is used in place of 1689 ** the fts5yy_shift_ofst[] array. 1690 ** 1691 ** The following are the tables generated in this section: 1692 ** 1693 ** fts5yy_action[] A single table containing all actions. 1694 ** fts5yy_lookahead[] A table containing the lookahead for each entry in 1695 ** fts5yy_action. Used to detect hash collisions. 1696 ** fts5yy_shift_ofst[] For each state, the offset into fts5yy_action for 1697 ** shifting terminals. 1698 ** fts5yy_reduce_ofst[] For each state, the offset into fts5yy_action for 1699 ** shifting non-terminals after a reduce. 1700 ** fts5yy_default[] Default action for each state. 1701 ** 1702 *********** Begin parsing tables **********************************************/ 1703 #define fts5YY_ACTTAB_COUNT (105) 1704 static const fts5YYACTIONTYPE fts5yy_action[] = { 1705 /* 0 */ 81, 20, 96, 6, 28, 99, 98, 26, 26, 18, 1706 /* 10 */ 96, 6, 28, 17, 98, 56, 26, 19, 96, 6, 1707 /* 20 */ 28, 14, 98, 14, 26, 31, 92, 96, 6, 28, 1708 /* 30 */ 108, 98, 25, 26, 21, 96, 6, 28, 78, 98, 1709 /* 40 */ 58, 26, 29, 96, 6, 28, 107, 98, 22, 26, 1710 /* 50 */ 24, 16, 12, 11, 1, 13, 13, 24, 16, 23, 1711 /* 60 */ 11, 33, 34, 13, 97, 8, 27, 32, 98, 7, 1712 /* 70 */ 26, 3, 4, 5, 3, 4, 5, 3, 83, 4, 1713 /* 80 */ 5, 3, 63, 5, 3, 62, 12, 2, 86, 13, 1714 /* 90 */ 9, 30, 10, 10, 54, 57, 75, 78, 78, 53, 1715 /* 100 */ 57, 15, 82, 82, 71, 1716 }; 1717 static const fts5YYCODETYPE fts5yy_lookahead[] = { 1718 /* 0 */ 16, 17, 18, 19, 20, 22, 22, 24, 24, 17, 1719 /* 10 */ 18, 19, 20, 7, 22, 9, 24, 17, 18, 19, 1720 /* 20 */ 20, 9, 22, 9, 24, 13, 17, 18, 19, 20, 1721 /* 30 */ 26, 22, 24, 24, 17, 18, 19, 20, 15, 22, 1722 /* 40 */ 9, 24, 17, 18, 19, 20, 26, 22, 21, 24, 1723 /* 50 */ 6, 7, 9, 9, 10, 12, 12, 6, 7, 21, 1724 /* 60 */ 9, 24, 25, 12, 18, 5, 20, 14, 22, 5, 1725 /* 70 */ 24, 3, 1, 2, 3, 1, 2, 3, 0, 1, 1726 /* 80 */ 2, 3, 11, 2, 3, 11, 9, 10, 5, 12, 1727 /* 90 */ 23, 24, 10, 10, 8, 9, 9, 15, 15, 8, 1728 /* 100 */ 9, 9, 27, 27, 11, 27, 27, 27, 27, 27, 1729 /* 110 */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 1730 /* 120 */ 27, 1731 }; 1732 #define fts5YY_SHIFT_COUNT (34) 1733 #define fts5YY_SHIFT_MIN (0) 1734 #define fts5YY_SHIFT_MAX (93) 1735 static const unsigned char fts5yy_shift_ofst[] = { 1736 /* 0 */ 44, 44, 44, 44, 44, 44, 51, 77, 43, 12, 1737 /* 10 */ 14, 83, 82, 14, 23, 23, 31, 31, 71, 74, 1738 /* 20 */ 78, 81, 86, 91, 6, 53, 53, 60, 64, 68, 1739 /* 30 */ 53, 87, 92, 53, 93, 1740 }; 1741 #define fts5YY_REDUCE_COUNT (17) 1742 #define fts5YY_REDUCE_MIN (-17) 1743 #define fts5YY_REDUCE_MAX (67) 1744 static const signed char fts5yy_reduce_ofst[] = { 1745 /* 0 */ -16, -8, 0, 9, 17, 25, 46, -17, -17, 37, 1746 /* 10 */ 67, 4, 4, 8, 4, 20, 27, 38, 1747 }; 1748 static const fts5YYACTIONTYPE fts5yy_default[] = { 1749 /* 0 */ 80, 80, 80, 80, 80, 80, 95, 80, 80, 105, 1750 /* 10 */ 80, 110, 110, 80, 110, 110, 80, 80, 80, 80, 1751 /* 20 */ 80, 91, 80, 80, 80, 101, 100, 80, 80, 90, 1752 /* 30 */ 103, 80, 80, 104, 80, 1753 }; 1754 /********** End of lemon-generated parsing tables *****************************/ 1755 1756 /* The next table maps tokens (terminal symbols) into fallback tokens. 1757 ** If a construct like the following: 1758 ** 1759 ** %fallback ID X Y Z. 1760 ** 1761 ** appears in the grammar, then ID becomes a fallback token for X, Y, 1762 ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser 1763 ** but it does not parse, the type of the token is changed to ID and 1764 ** the parse is retried before an error is thrown. 1765 ** 1766 ** This feature can be used, for example, to cause some keywords in a language 1767 ** to revert to identifiers if they keyword does not apply in the context where 1768 ** it appears. 1769 */ 1770 #ifdef fts5YYFALLBACK 1771 static const fts5YYCODETYPE fts5yyFallback[] = { 1772 }; 1773 #endif /* fts5YYFALLBACK */ 1774 1775 /* The following structure represents a single element of the 1776 ** parser's stack. Information stored includes: 1777 ** 1778 ** + The state number for the parser at this level of the stack. 1779 ** 1780 ** + The value of the token stored at this level of the stack. 1781 ** (In other words, the "major" token.) 1782 ** 1783 ** + The semantic value stored at this level of the stack. This is 1784 ** the information used by the action routines in the grammar. 1785 ** It is sometimes called the "minor" token. 1786 ** 1787 ** After the "shift" half of a SHIFTREDUCE action, the stateno field 1788 ** actually contains the reduce action for the second half of the 1789 ** SHIFTREDUCE. 1790 */ 1791 struct fts5yyStackEntry { 1792 fts5YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ 1793 fts5YYCODETYPE major; /* The major token value. This is the code 1794 ** number for the token at this stack level */ 1795 fts5YYMINORTYPE minor; /* The user-supplied minor token value. This 1796 ** is the value of the token */ 1797 }; 1798 typedef struct fts5yyStackEntry fts5yyStackEntry; 1799 1800 /* The state of the parser is completely contained in an instance of 1801 ** the following structure */ 1802 struct fts5yyParser { 1803 fts5yyStackEntry *fts5yytos; /* Pointer to top element of the stack */ 1804 #ifdef fts5YYTRACKMAXSTACKDEPTH 1805 int fts5yyhwm; /* High-water mark of the stack */ 1806 #endif 1807 #ifndef fts5YYNOERRORRECOVERY 1808 int fts5yyerrcnt; /* Shifts left before out of the error */ 1809 #endif 1810 sqlite3Fts5ParserARG_SDECL /* A place to hold %extra_argument */ 1811 sqlite3Fts5ParserCTX_SDECL /* A place to hold %extra_context */ 1812 #if fts5YYSTACKDEPTH<=0 1813 int fts5yystksz; /* Current side of the stack */ 1814 fts5yyStackEntry *fts5yystack; /* The parser's stack */ 1815 fts5yyStackEntry fts5yystk0; /* First stack entry */ 1816 #else 1817 fts5yyStackEntry fts5yystack[fts5YYSTACKDEPTH]; /* The parser's stack */ 1818 fts5yyStackEntry *fts5yystackEnd; /* Last entry in the stack */ 1819 #endif 1820 }; 1821 typedef struct fts5yyParser fts5yyParser; 1822 1823 #include <assert.h> 1824 #ifndef NDEBUG 1825 #include <stdio.h> 1826 static FILE *fts5yyTraceFILE = 0; 1827 static char *fts5yyTracePrompt = 0; 1828 #endif /* NDEBUG */ 1829 1830 #ifndef NDEBUG 1831 /* 1832 ** Turn parser tracing on by giving a stream to which to write the trace 1833 ** and a prompt to preface each trace message. Tracing is turned off 1834 ** by making either argument NULL 1835 ** 1836 ** Inputs: 1837 ** <ul> 1838 ** <li> A FILE* to which trace output should be written. 1839 ** If NULL, then tracing is turned off. 1840 ** <li> A prefix string written at the beginning of every 1841 ** line of trace output. If NULL, then tracing is 1842 ** turned off. 1843 ** </ul> 1844 ** 1845 ** Outputs: 1846 ** None. 1847 */ 1848 static void sqlite3Fts5ParserTrace(FILE *TraceFILE, char *zTracePrompt){ 1849 fts5yyTraceFILE = TraceFILE; 1850 fts5yyTracePrompt = zTracePrompt; 1851 if( fts5yyTraceFILE==0 ) fts5yyTracePrompt = 0; 1852 else if( fts5yyTracePrompt==0 ) fts5yyTraceFILE = 0; 1853 } 1854 #endif /* NDEBUG */ 1855 1856 #if defined(fts5YYCOVERAGE) || !defined(NDEBUG) 1857 /* For tracing shifts, the names of all terminals and nonterminals 1858 ** are required. The following table supplies these names */ 1859 static const char *const fts5yyTokenName[] = { 1860 /* 0 */ "$", 1861 /* 1 */ "OR", 1862 /* 2 */ "AND", 1863 /* 3 */ "NOT", 1864 /* 4 */ "TERM", 1865 /* 5 */ "COLON", 1866 /* 6 */ "MINUS", 1867 /* 7 */ "LCP", 1868 /* 8 */ "RCP", 1869 /* 9 */ "STRING", 1870 /* 10 */ "LP", 1871 /* 11 */ "RP", 1872 /* 12 */ "CARET", 1873 /* 13 */ "COMMA", 1874 /* 14 */ "PLUS", 1875 /* 15 */ "STAR", 1876 /* 16 */ "input", 1877 /* 17 */ "expr", 1878 /* 18 */ "cnearset", 1879 /* 19 */ "exprlist", 1880 /* 20 */ "colset", 1881 /* 21 */ "colsetlist", 1882 /* 22 */ "nearset", 1883 /* 23 */ "nearphrases", 1884 /* 24 */ "phrase", 1885 /* 25 */ "neardist_opt", 1886 /* 26 */ "star_opt", 1887 }; 1888 #endif /* defined(fts5YYCOVERAGE) || !defined(NDEBUG) */ 1889 1890 #ifndef NDEBUG 1891 /* For tracing reduce actions, the names of all rules are required. 1892 */ 1893 static const char *const fts5yyRuleName[] = { 1894 /* 0 */ "input ::= expr", 1895 /* 1 */ "colset ::= MINUS LCP colsetlist RCP", 1896 /* 2 */ "colset ::= LCP colsetlist RCP", 1897 /* 3 */ "colset ::= STRING", 1898 /* 4 */ "colset ::= MINUS STRING", 1899 /* 5 */ "colsetlist ::= colsetlist STRING", 1900 /* 6 */ "colsetlist ::= STRING", 1901 /* 7 */ "expr ::= expr AND expr", 1902 /* 8 */ "expr ::= expr OR expr", 1903 /* 9 */ "expr ::= expr NOT expr", 1904 /* 10 */ "expr ::= colset COLON LP expr RP", 1905 /* 11 */ "expr ::= LP expr RP", 1906 /* 12 */ "expr ::= exprlist", 1907 /* 13 */ "exprlist ::= cnearset", 1908 /* 14 */ "exprlist ::= exprlist cnearset", 1909 /* 15 */ "cnearset ::= nearset", 1910 /* 16 */ "cnearset ::= colset COLON nearset", 1911 /* 17 */ "nearset ::= phrase", 1912 /* 18 */ "nearset ::= CARET phrase", 1913 /* 19 */ "nearset ::= STRING LP nearphrases neardist_opt RP", 1914 /* 20 */ "nearphrases ::= phrase", 1915 /* 21 */ "nearphrases ::= nearphrases phrase", 1916 /* 22 */ "neardist_opt ::=", 1917 /* 23 */ "neardist_opt ::= COMMA STRING", 1918 /* 24 */ "phrase ::= phrase PLUS STRING star_opt", 1919 /* 25 */ "phrase ::= STRING star_opt", 1920 /* 26 */ "star_opt ::= STAR", 1921 /* 27 */ "star_opt ::=", 1922 }; 1923 #endif /* NDEBUG */ 1924 1925 1926 #if fts5YYSTACKDEPTH<=0 1927 /* 1928 ** Try to increase the size of the parser stack. Return the number 1929 ** of errors. Return 0 on success. 1930 */ 1931 static int fts5yyGrowStack(fts5yyParser *p){ 1932 int newSize; 1933 int idx; 1934 fts5yyStackEntry *pNew; 1935 1936 newSize = p->fts5yystksz*2 + 100; 1937 idx = p->fts5yytos ? (int)(p->fts5yytos - p->fts5yystack) : 0; 1938 if( p->fts5yystack==&p->fts5yystk0 ){ 1939 pNew = malloc(newSize*sizeof(pNew[0])); 1940 if( pNew ) pNew[0] = p->fts5yystk0; 1941 }else{ 1942 pNew = realloc(p->fts5yystack, newSize*sizeof(pNew[0])); 1943 } 1944 if( pNew ){ 1945 p->fts5yystack = pNew; 1946 p->fts5yytos = &p->fts5yystack[idx]; 1947 #ifndef NDEBUG 1948 if( fts5yyTraceFILE ){ 1949 fprintf(fts5yyTraceFILE,"%sStack grows from %d to %d entries.\n", 1950 fts5yyTracePrompt, p->fts5yystksz, newSize); 1951 } 1952 #endif 1953 p->fts5yystksz = newSize; 1954 } 1955 return pNew==0; 1956 } 1957 #endif 1958 1959 /* Datatype of the argument to the memory allocated passed as the 1960 ** second argument to sqlite3Fts5ParserAlloc() below. This can be changed by 1961 ** putting an appropriate #define in the %include section of the input 1962 ** grammar. 1963 */ 1964 #ifndef fts5YYMALLOCARGTYPE 1965 # define fts5YYMALLOCARGTYPE size_t 1966 #endif 1967 1968 /* Initialize a new parser that has already been allocated. 1969 */ 1970 static void sqlite3Fts5ParserInit(void *fts5yypRawParser sqlite3Fts5ParserCTX_PDECL){ 1971 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yypRawParser; 1972 sqlite3Fts5ParserCTX_STORE 1973 #ifdef fts5YYTRACKMAXSTACKDEPTH 1974 fts5yypParser->fts5yyhwm = 0; 1975 #endif 1976 #if fts5YYSTACKDEPTH<=0 1977 fts5yypParser->fts5yytos = NULL; 1978 fts5yypParser->fts5yystack = NULL; 1979 fts5yypParser->fts5yystksz = 0; 1980 if( fts5yyGrowStack(fts5yypParser) ){ 1981 fts5yypParser->fts5yystack = &fts5yypParser->fts5yystk0; 1982 fts5yypParser->fts5yystksz = 1; 1983 } 1984 #endif 1985 #ifndef fts5YYNOERRORRECOVERY 1986 fts5yypParser->fts5yyerrcnt = -1; 1987 #endif 1988 fts5yypParser->fts5yytos = fts5yypParser->fts5yystack; 1989 fts5yypParser->fts5yystack[0].stateno = 0; 1990 fts5yypParser->fts5yystack[0].major = 0; 1991 #if fts5YYSTACKDEPTH>0 1992 fts5yypParser->fts5yystackEnd = &fts5yypParser->fts5yystack[fts5YYSTACKDEPTH-1]; 1993 #endif 1994 } 1995 1996 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK 1997 /* 1998 ** This function allocates a new parser. 1999 ** The only argument is a pointer to a function which works like 2000 ** malloc. 2001 ** 2002 ** Inputs: 2003 ** A pointer to the function used to allocate memory. 2004 ** 2005 ** Outputs: 2006 ** A pointer to a parser. This pointer is used in subsequent calls 2007 ** to sqlite3Fts5Parser and sqlite3Fts5ParserFree. 2008 */ 2009 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(fts5YYMALLOCARGTYPE) sqlite3Fts5ParserCTX_PDECL){ 2010 fts5yyParser *fts5yypParser; 2011 fts5yypParser = (fts5yyParser*)(*mallocProc)( (fts5YYMALLOCARGTYPE)sizeof(fts5yyParser) ); 2012 if( fts5yypParser ){ 2013 sqlite3Fts5ParserCTX_STORE 2014 sqlite3Fts5ParserInit(fts5yypParser sqlite3Fts5ParserCTX_PARAM); 2015 } 2016 return (void*)fts5yypParser; 2017 } 2018 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ 2019 2020 2021 /* The following function deletes the "minor type" or semantic value 2022 ** associated with a symbol. The symbol can be either a terminal 2023 ** or nonterminal. "fts5yymajor" is the symbol code, and "fts5yypminor" is 2024 ** a pointer to the value to be deleted. The code used to do the 2025 ** deletions is derived from the %destructor and/or %token_destructor 2026 ** directives of the input grammar. 2027 */ 2028 static void fts5yy_destructor( 2029 fts5yyParser *fts5yypParser, /* The parser */ 2030 fts5YYCODETYPE fts5yymajor, /* Type code for object to destroy */ 2031 fts5YYMINORTYPE *fts5yypminor /* The object to be destroyed */ 2032 ){ 2033 sqlite3Fts5ParserARG_FETCH 2034 sqlite3Fts5ParserCTX_FETCH 2035 switch( fts5yymajor ){ 2036 /* Here is inserted the actions which take place when a 2037 ** terminal or non-terminal is destroyed. This can happen 2038 ** when the symbol is popped from the stack during a 2039 ** reduce or during error processing or when a parser is 2040 ** being destroyed before it is finished parsing. 2041 ** 2042 ** Note: during a reduce, the only symbols destroyed are those 2043 ** which appear on the RHS of the rule, but which are *not* used 2044 ** inside the C code. 2045 */ 2046 /********* Begin destructor definitions ***************************************/ 2047 case 16: /* input */ 2048 { 2049 #line 83 "fts5parse.y" 2050 (void)pParse; 2051 #line 586 "fts5parse.sql" 2052 } 2053 break; 2054 case 17: /* expr */ 2055 case 18: /* cnearset */ 2056 case 19: /* exprlist */ 2057 { 2058 #line 89 "fts5parse.y" 2059 sqlite3Fts5ParseNodeFree((fts5yypminor->fts5yy24)); 2060 #line 595 "fts5parse.sql" 2061 } 2062 break; 2063 case 20: /* colset */ 2064 case 21: /* colsetlist */ 2065 { 2066 #line 93 "fts5parse.y" 2067 sqlite3_free((fts5yypminor->fts5yy11)); 2068 #line 603 "fts5parse.sql" 2069 } 2070 break; 2071 case 22: /* nearset */ 2072 case 23: /* nearphrases */ 2073 { 2074 #line 148 "fts5parse.y" 2075 sqlite3Fts5ParseNearsetFree((fts5yypminor->fts5yy46)); 2076 #line 611 "fts5parse.sql" 2077 } 2078 break; 2079 case 24: /* phrase */ 2080 { 2081 #line 183 "fts5parse.y" 2082 sqlite3Fts5ParsePhraseFree((fts5yypminor->fts5yy53)); 2083 #line 618 "fts5parse.sql" 2084 } 2085 break; 2086 /********* End destructor definitions *****************************************/ 2087 default: break; /* If no destructor action specified: do nothing */ 2088 } 2089 } 2090 2091 /* 2092 ** Pop the parser's stack once. 2093 ** 2094 ** If there is a destructor routine associated with the token which 2095 ** is popped from the stack, then call it. 2096 */ 2097 static void fts5yy_pop_parser_stack(fts5yyParser *pParser){ 2098 fts5yyStackEntry *fts5yytos; 2099 assert( pParser->fts5yytos!=0 ); 2100 assert( pParser->fts5yytos > pParser->fts5yystack ); 2101 fts5yytos = pParser->fts5yytos--; 2102 #ifndef NDEBUG 2103 if( fts5yyTraceFILE ){ 2104 fprintf(fts5yyTraceFILE,"%sPopping %s\n", 2105 fts5yyTracePrompt, 2106 fts5yyTokenName[fts5yytos->major]); 2107 } 2108 #endif 2109 fts5yy_destructor(pParser, fts5yytos->major, &fts5yytos->minor); 2110 } 2111 2112 /* 2113 ** Clear all secondary memory allocations from the parser 2114 */ 2115 static void sqlite3Fts5ParserFinalize(void *p){ 2116 fts5yyParser *pParser = (fts5yyParser*)p; 2117 while( pParser->fts5yytos>pParser->fts5yystack ) fts5yy_pop_parser_stack(pParser); 2118 #if fts5YYSTACKDEPTH<=0 2119 if( pParser->fts5yystack!=&pParser->fts5yystk0 ) free(pParser->fts5yystack); 2120 #endif 2121 } 2122 2123 #ifndef sqlite3Fts5Parser_ENGINEALWAYSONSTACK 2124 /* 2125 ** Deallocate and destroy a parser. Destructors are called for 2126 ** all stack elements before shutting the parser down. 2127 ** 2128 ** If the fts5YYPARSEFREENEVERNULL macro exists (for example because it 2129 ** is defined in a %include section of the input grammar) then it is 2130 ** assumed that the input pointer is never NULL. 2131 */ 2132 static void sqlite3Fts5ParserFree( 2133 void *p, /* The parser to be deleted */ 2134 void (*freeProc)(void*) /* Function used to reclaim memory */ 2135 ){ 2136 #ifndef fts5YYPARSEFREENEVERNULL 2137 if( p==0 ) return; 2138 #endif 2139 sqlite3Fts5ParserFinalize(p); 2140 (*freeProc)(p); 2141 } 2142 #endif /* sqlite3Fts5Parser_ENGINEALWAYSONSTACK */ 2143 2144 /* 2145 ** Return the peak depth of the stack for a parser. 2146 */ 2147 #ifdef fts5YYTRACKMAXSTACKDEPTH 2148 static int sqlite3Fts5ParserStackPeak(void *p){ 2149 fts5yyParser *pParser = (fts5yyParser*)p; 2150 return pParser->fts5yyhwm; 2151 } 2152 #endif 2153 2154 /* This array of booleans keeps track of the parser statement 2155 ** coverage. The element fts5yycoverage[X][Y] is set when the parser 2156 ** is in state X and has a lookahead token Y. In a well-tested 2157 ** systems, every element of this matrix should end up being set. 2158 */ 2159 #if defined(fts5YYCOVERAGE) 2160 static unsigned char fts5yycoverage[fts5YYNSTATE][fts5YYNFTS5TOKEN]; 2161 #endif 2162 2163 /* 2164 ** Write into out a description of every state/lookahead combination that 2165 ** 2166 ** (1) has not been used by the parser, and 2167 ** (2) is not a syntax error. 2168 ** 2169 ** Return the number of missed state/lookahead combinations. 2170 */ 2171 #if defined(fts5YYCOVERAGE) 2172 static int sqlite3Fts5ParserCoverage(FILE *out){ 2173 int stateno, iLookAhead, i; 2174 int nMissed = 0; 2175 for(stateno=0; stateno<fts5YYNSTATE; stateno++){ 2176 i = fts5yy_shift_ofst[stateno]; 2177 for(iLookAhead=0; iLookAhead<fts5YYNFTS5TOKEN; iLookAhead++){ 2178 if( fts5yy_lookahead[i+iLookAhead]!=iLookAhead ) continue; 2179 if( fts5yycoverage[stateno][iLookAhead]==0 ) nMissed++; 2180 if( out ){ 2181 fprintf(out,"State %d lookahead %s %s\n", stateno, 2182 fts5yyTokenName[iLookAhead], 2183 fts5yycoverage[stateno][iLookAhead] ? "ok" : "missed"); 2184 } 2185 } 2186 } 2187 return nMissed; 2188 } 2189 #endif 2190 2191 /* 2192 ** Find the appropriate action for a parser given the terminal 2193 ** look-ahead token iLookAhead. 2194 */ 2195 static fts5YYACTIONTYPE fts5yy_find_shift_action( 2196 fts5YYCODETYPE iLookAhead, /* The look-ahead token */ 2197 fts5YYACTIONTYPE stateno /* Current state number */ 2198 ){ 2199 int i; 2200 2201 if( stateno>fts5YY_MAX_SHIFT ) return stateno; 2202 assert( stateno <= fts5YY_SHIFT_COUNT ); 2203 #if defined(fts5YYCOVERAGE) 2204 fts5yycoverage[stateno][iLookAhead] = 1; 2205 #endif 2206 do{ 2207 i = fts5yy_shift_ofst[stateno]; 2208 assert( i>=0 ); 2209 assert( i<=fts5YY_ACTTAB_COUNT ); 2210 assert( i+fts5YYNFTS5TOKEN<=(int)fts5YY_NLOOKAHEAD ); 2211 assert( iLookAhead!=fts5YYNOCODE ); 2212 assert( iLookAhead < fts5YYNFTS5TOKEN ); 2213 i += iLookAhead; 2214 assert( i<(int)fts5YY_NLOOKAHEAD ); 2215 if( fts5yy_lookahead[i]!=iLookAhead ){ 2216 #ifdef fts5YYFALLBACK 2217 fts5YYCODETYPE iFallback; /* Fallback token */ 2218 assert( iLookAhead<sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0]) ); 2219 iFallback = fts5yyFallback[iLookAhead]; 2220 if( iFallback!=0 ){ 2221 #ifndef NDEBUG 2222 if( fts5yyTraceFILE ){ 2223 fprintf(fts5yyTraceFILE, "%sFALLBACK %s => %s\n", 2224 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], fts5yyTokenName[iFallback]); 2225 } 2226 #endif 2227 assert( fts5yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ 2228 iLookAhead = iFallback; 2229 continue; 2230 } 2231 #endif 2232 #ifdef fts5YYWILDCARD 2233 { 2234 int j = i - iLookAhead + fts5YYWILDCARD; 2235 assert( j<(int)(sizeof(fts5yy_lookahead)/sizeof(fts5yy_lookahead[0])) ); 2236 if( fts5yy_lookahead[j]==fts5YYWILDCARD && iLookAhead>0 ){ 2237 #ifndef NDEBUG 2238 if( fts5yyTraceFILE ){ 2239 fprintf(fts5yyTraceFILE, "%sWILDCARD %s => %s\n", 2240 fts5yyTracePrompt, fts5yyTokenName[iLookAhead], 2241 fts5yyTokenName[fts5YYWILDCARD]); 2242 } 2243 #endif /* NDEBUG */ 2244 return fts5yy_action[j]; 2245 } 2246 } 2247 #endif /* fts5YYWILDCARD */ 2248 return fts5yy_default[stateno]; 2249 }else{ 2250 assert( i>=0 && i<(int)(sizeof(fts5yy_action)/sizeof(fts5yy_action[0])) ); 2251 return fts5yy_action[i]; 2252 } 2253 }while(1); 2254 } 2255 2256 /* 2257 ** Find the appropriate action for a parser given the non-terminal 2258 ** look-ahead token iLookAhead. 2259 */ 2260 static fts5YYACTIONTYPE fts5yy_find_reduce_action( 2261 fts5YYACTIONTYPE stateno, /* Current state number */ 2262 fts5YYCODETYPE iLookAhead /* The look-ahead token */ 2263 ){ 2264 int i; 2265 #ifdef fts5YYERRORSYMBOL 2266 if( stateno>fts5YY_REDUCE_COUNT ){ 2267 return fts5yy_default[stateno]; 2268 } 2269 #else 2270 assert( stateno<=fts5YY_REDUCE_COUNT ); 2271 #endif 2272 i = fts5yy_reduce_ofst[stateno]; 2273 assert( iLookAhead!=fts5YYNOCODE ); 2274 i += iLookAhead; 2275 #ifdef fts5YYERRORSYMBOL 2276 if( i<0 || i>=fts5YY_ACTTAB_COUNT || fts5yy_lookahead[i]!=iLookAhead ){ 2277 return fts5yy_default[stateno]; 2278 } 2279 #else 2280 assert( i>=0 && i<fts5YY_ACTTAB_COUNT ); 2281 assert( fts5yy_lookahead[i]==iLookAhead ); 2282 #endif 2283 return fts5yy_action[i]; 2284 } 2285 2286 /* 2287 ** The following routine is called if the stack overflows. 2288 */ 2289 static void fts5yyStackOverflow(fts5yyParser *fts5yypParser){ 2290 sqlite3Fts5ParserARG_FETCH 2291 sqlite3Fts5ParserCTX_FETCH 2292 #ifndef NDEBUG 2293 if( fts5yyTraceFILE ){ 2294 fprintf(fts5yyTraceFILE,"%sStack Overflow!\n",fts5yyTracePrompt); 2295 } 2296 #endif 2297 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); 2298 /* Here code is inserted which will execute if the parser 2299 ** stack every overflows */ 2300 /******** Begin %stack_overflow code ******************************************/ 2301 #line 36 "fts5parse.y" 2302 2303 sqlite3Fts5ParseError(pParse, "fts5: parser stack overflow"); 2304 #line 839 "fts5parse.sql" 2305 /******** End %stack_overflow code ********************************************/ 2306 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument var */ 2307 sqlite3Fts5ParserCTX_STORE 2308 } 2309 2310 /* 2311 ** Print tracing information for a SHIFT action 2312 */ 2313 #ifndef NDEBUG 2314 static void fts5yyTraceShift(fts5yyParser *fts5yypParser, int fts5yyNewState, const char *zTag){ 2315 if( fts5yyTraceFILE ){ 2316 if( fts5yyNewState<fts5YYNSTATE ){ 2317 fprintf(fts5yyTraceFILE,"%s%s '%s', go to state %d\n", 2318 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], 2319 fts5yyNewState); 2320 }else{ 2321 fprintf(fts5yyTraceFILE,"%s%s '%s', pending reduce %d\n", 2322 fts5yyTracePrompt, zTag, fts5yyTokenName[fts5yypParser->fts5yytos->major], 2323 fts5yyNewState - fts5YY_MIN_REDUCE); 2324 } 2325 } 2326 } 2327 #else 2328 # define fts5yyTraceShift(X,Y,Z) 2329 #endif 2330 2331 /* 2332 ** Perform a shift action. 2333 */ 2334 static void fts5yy_shift( 2335 fts5yyParser *fts5yypParser, /* The parser to be shifted */ 2336 fts5YYACTIONTYPE fts5yyNewState, /* The new state to shift in */ 2337 fts5YYCODETYPE fts5yyMajor, /* The major token to shift in */ 2338 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyMinor /* The minor token to shift in */ 2339 ){ 2340 fts5yyStackEntry *fts5yytos; 2341 fts5yypParser->fts5yytos++; 2342 #ifdef fts5YYTRACKMAXSTACKDEPTH 2343 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ 2344 fts5yypParser->fts5yyhwm++; 2345 assert( fts5yypParser->fts5yyhwm == (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack) ); 2346 } 2347 #endif 2348 #if fts5YYSTACKDEPTH>0 2349 if( fts5yypParser->fts5yytos>fts5yypParser->fts5yystackEnd ){ 2350 fts5yypParser->fts5yytos--; 2351 fts5yyStackOverflow(fts5yypParser); 2352 return; 2353 } 2354 #else 2355 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz] ){ 2356 if( fts5yyGrowStack(fts5yypParser) ){ 2357 fts5yypParser->fts5yytos--; 2358 fts5yyStackOverflow(fts5yypParser); 2359 return; 2360 } 2361 } 2362 #endif 2363 if( fts5yyNewState > fts5YY_MAX_SHIFT ){ 2364 fts5yyNewState += fts5YY_MIN_REDUCE - fts5YY_MIN_SHIFTREDUCE; 2365 } 2366 fts5yytos = fts5yypParser->fts5yytos; 2367 fts5yytos->stateno = fts5yyNewState; 2368 fts5yytos->major = fts5yyMajor; 2369 fts5yytos->minor.fts5yy0 = fts5yyMinor; 2370 fts5yyTraceShift(fts5yypParser, fts5yyNewState, "Shift"); 2371 } 2372 2373 /* For rule J, fts5yyRuleInfoLhs[J] contains the symbol on the left-hand side 2374 ** of that rule */ 2375 static const fts5YYCODETYPE fts5yyRuleInfoLhs[] = { 2376 16, /* (0) input ::= expr */ 2377 20, /* (1) colset ::= MINUS LCP colsetlist RCP */ 2378 20, /* (2) colset ::= LCP colsetlist RCP */ 2379 20, /* (3) colset ::= STRING */ 2380 20, /* (4) colset ::= MINUS STRING */ 2381 21, /* (5) colsetlist ::= colsetlist STRING */ 2382 21, /* (6) colsetlist ::= STRING */ 2383 17, /* (7) expr ::= expr AND expr */ 2384 17, /* (8) expr ::= expr OR expr */ 2385 17, /* (9) expr ::= expr NOT expr */ 2386 17, /* (10) expr ::= colset COLON LP expr RP */ 2387 17, /* (11) expr ::= LP expr RP */ 2388 17, /* (12) expr ::= exprlist */ 2389 19, /* (13) exprlist ::= cnearset */ 2390 19, /* (14) exprlist ::= exprlist cnearset */ 2391 18, /* (15) cnearset ::= nearset */ 2392 18, /* (16) cnearset ::= colset COLON nearset */ 2393 22, /* (17) nearset ::= phrase */ 2394 22, /* (18) nearset ::= CARET phrase */ 2395 22, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ 2396 23, /* (20) nearphrases ::= phrase */ 2397 23, /* (21) nearphrases ::= nearphrases phrase */ 2398 25, /* (22) neardist_opt ::= */ 2399 25, /* (23) neardist_opt ::= COMMA STRING */ 2400 24, /* (24) phrase ::= phrase PLUS STRING star_opt */ 2401 24, /* (25) phrase ::= STRING star_opt */ 2402 26, /* (26) star_opt ::= STAR */ 2403 26, /* (27) star_opt ::= */ 2404 }; 2405 2406 /* For rule J, fts5yyRuleInfoNRhs[J] contains the negative of the number 2407 ** of symbols on the right-hand side of that rule. */ 2408 static const signed char fts5yyRuleInfoNRhs[] = { 2409 -1, /* (0) input ::= expr */ 2410 -4, /* (1) colset ::= MINUS LCP colsetlist RCP */ 2411 -3, /* (2) colset ::= LCP colsetlist RCP */ 2412 -1, /* (3) colset ::= STRING */ 2413 -2, /* (4) colset ::= MINUS STRING */ 2414 -2, /* (5) colsetlist ::= colsetlist STRING */ 2415 -1, /* (6) colsetlist ::= STRING */ 2416 -3, /* (7) expr ::= expr AND expr */ 2417 -3, /* (8) expr ::= expr OR expr */ 2418 -3, /* (9) expr ::= expr NOT expr */ 2419 -5, /* (10) expr ::= colset COLON LP expr RP */ 2420 -3, /* (11) expr ::= LP expr RP */ 2421 -1, /* (12) expr ::= exprlist */ 2422 -1, /* (13) exprlist ::= cnearset */ 2423 -2, /* (14) exprlist ::= exprlist cnearset */ 2424 -1, /* (15) cnearset ::= nearset */ 2425 -3, /* (16) cnearset ::= colset COLON nearset */ 2426 -1, /* (17) nearset ::= phrase */ 2427 -2, /* (18) nearset ::= CARET phrase */ 2428 -5, /* (19) nearset ::= STRING LP nearphrases neardist_opt RP */ 2429 -1, /* (20) nearphrases ::= phrase */ 2430 -2, /* (21) nearphrases ::= nearphrases phrase */ 2431 0, /* (22) neardist_opt ::= */ 2432 -2, /* (23) neardist_opt ::= COMMA STRING */ 2433 -4, /* (24) phrase ::= phrase PLUS STRING star_opt */ 2434 -2, /* (25) phrase ::= STRING star_opt */ 2435 -1, /* (26) star_opt ::= STAR */ 2436 0, /* (27) star_opt ::= */ 2437 }; 2438 2439 static void fts5yy_accept(fts5yyParser*); /* Forward Declaration */ 2440 2441 /* 2442 ** Perform a reduce action and the shift that must immediately 2443 ** follow the reduce. 2444 ** 2445 ** The fts5yyLookahead and fts5yyLookaheadToken parameters provide reduce actions 2446 ** access to the lookahead token (if any). The fts5yyLookahead will be fts5YYNOCODE 2447 ** if the lookahead token has already been consumed. As this procedure is 2448 ** only called from one place, optimizing compilers will in-line it, which 2449 ** means that the extra parameters have no performance impact. 2450 */ 2451 static fts5YYACTIONTYPE fts5yy_reduce( 2452 fts5yyParser *fts5yypParser, /* The parser */ 2453 unsigned int fts5yyruleno, /* Number of the rule by which to reduce */ 2454 int fts5yyLookahead, /* Lookahead token, or fts5YYNOCODE if none */ 2455 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyLookaheadToken /* Value of the lookahead token */ 2456 sqlite3Fts5ParserCTX_PDECL /* %extra_context */ 2457 ){ 2458 int fts5yygoto; /* The next state */ 2459 fts5YYACTIONTYPE fts5yyact; /* The next action */ 2460 fts5yyStackEntry *fts5yymsp; /* The top of the parser's stack */ 2461 int fts5yysize; /* Amount to pop the stack */ 2462 sqlite3Fts5ParserARG_FETCH 2463 (void)fts5yyLookahead; 2464 (void)fts5yyLookaheadToken; 2465 fts5yymsp = fts5yypParser->fts5yytos; 2466 2467 switch( fts5yyruleno ){ 2468 /* Beginning here are the reduction cases. A typical example 2469 ** follows: 2470 ** case 0: 2471 ** #line <lineno> <grammarfile> 2472 ** { ... } // User supplied code 2473 ** #line <lineno> <thisfile> 2474 ** break; 2475 */ 2476 /********** Begin reduce actions **********************************************/ 2477 fts5YYMINORTYPE fts5yylhsminor; 2478 case 0: /* input ::= expr */ 2479 #line 82 "fts5parse.y" 2480 { sqlite3Fts5ParseFinished(pParse, fts5yymsp[0].minor.fts5yy24); } 2481 #line 1016 "fts5parse.sql" 2482 break; 2483 case 1: /* colset ::= MINUS LCP colsetlist RCP */ 2484 #line 97 "fts5parse.y" 2485 { 2486 fts5yymsp[-3].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); 2487 } 2488 #line 1023 "fts5parse.sql" 2489 break; 2490 case 2: /* colset ::= LCP colsetlist RCP */ 2491 #line 100 "fts5parse.y" 2492 { fts5yymsp[-2].minor.fts5yy11 = fts5yymsp[-1].minor.fts5yy11; } 2493 #line 1028 "fts5parse.sql" 2494 break; 2495 case 3: /* colset ::= STRING */ 2496 #line 101 "fts5parse.y" 2497 { 2498 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); 2499 } 2500 #line 1035 "fts5parse.sql" 2501 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; 2502 break; 2503 case 4: /* colset ::= MINUS STRING */ 2504 #line 104 "fts5parse.y" 2505 { 2506 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); 2507 fts5yymsp[-1].minor.fts5yy11 = sqlite3Fts5ParseColsetInvert(pParse, fts5yymsp[-1].minor.fts5yy11); 2508 } 2509 #line 1044 "fts5parse.sql" 2510 break; 2511 case 5: /* colsetlist ::= colsetlist STRING */ 2512 #line 109 "fts5parse.y" 2513 { 2514 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, fts5yymsp[-1].minor.fts5yy11, &fts5yymsp[0].minor.fts5yy0); } 2515 #line 1050 "fts5parse.sql" 2516 fts5yymsp[-1].minor.fts5yy11 = fts5yylhsminor.fts5yy11; 2517 break; 2518 case 6: /* colsetlist ::= STRING */ 2519 #line 111 "fts5parse.y" 2520 { 2521 fts5yylhsminor.fts5yy11 = sqlite3Fts5ParseColset(pParse, 0, &fts5yymsp[0].minor.fts5yy0); 2522 } 2523 #line 1058 "fts5parse.sql" 2524 fts5yymsp[0].minor.fts5yy11 = fts5yylhsminor.fts5yy11; 2525 break; 2526 case 7: /* expr ::= expr AND expr */ 2527 #line 115 "fts5parse.y" 2528 { 2529 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_AND, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); 2530 } 2531 #line 1066 "fts5parse.sql" 2532 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2533 break; 2534 case 8: /* expr ::= expr OR expr */ 2535 #line 118 "fts5parse.y" 2536 { 2537 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_OR, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); 2538 } 2539 #line 1074 "fts5parse.sql" 2540 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2541 break; 2542 case 9: /* expr ::= expr NOT expr */ 2543 #line 121 "fts5parse.y" 2544 { 2545 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_NOT, fts5yymsp[-2].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24, 0); 2546 } 2547 #line 1082 "fts5parse.sql" 2548 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2549 break; 2550 case 10: /* expr ::= colset COLON LP expr RP */ 2551 #line 125 "fts5parse.y" 2552 { 2553 sqlite3Fts5ParseSetColset(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[-4].minor.fts5yy11); 2554 fts5yylhsminor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24; 2555 } 2556 #line 1091 "fts5parse.sql" 2557 fts5yymsp[-4].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2558 break; 2559 case 11: /* expr ::= LP expr RP */ 2560 #line 129 "fts5parse.y" 2561 {fts5yymsp[-2].minor.fts5yy24 = fts5yymsp[-1].minor.fts5yy24;} 2562 #line 1097 "fts5parse.sql" 2563 break; 2564 case 12: /* expr ::= exprlist */ 2565 case 13: /* exprlist ::= cnearset */ fts5yytestcase(fts5yyruleno==13); 2566 #line 130 "fts5parse.y" 2567 {fts5yylhsminor.fts5yy24 = fts5yymsp[0].minor.fts5yy24;} 2568 #line 1103 "fts5parse.sql" 2569 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2570 break; 2571 case 14: /* exprlist ::= exprlist cnearset */ 2572 #line 133 "fts5parse.y" 2573 { 2574 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseImplicitAnd(pParse, fts5yymsp[-1].minor.fts5yy24, fts5yymsp[0].minor.fts5yy24); 2575 } 2576 #line 1111 "fts5parse.sql" 2577 fts5yymsp[-1].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2578 break; 2579 case 15: /* cnearset ::= nearset */ 2580 #line 137 "fts5parse.y" 2581 { 2582 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46); 2583 } 2584 #line 1119 "fts5parse.sql" 2585 fts5yymsp[0].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2586 break; 2587 case 16: /* cnearset ::= colset COLON nearset */ 2588 #line 140 "fts5parse.y" 2589 { 2590 fts5yylhsminor.fts5yy24 = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, fts5yymsp[0].minor.fts5yy46); 2591 sqlite3Fts5ParseSetColset(pParse, fts5yylhsminor.fts5yy24, fts5yymsp[-2].minor.fts5yy11); 2592 } 2593 #line 1128 "fts5parse.sql" 2594 fts5yymsp[-2].minor.fts5yy24 = fts5yylhsminor.fts5yy24; 2595 break; 2596 case 17: /* nearset ::= phrase */ 2597 #line 151 "fts5parse.y" 2598 { fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); } 2599 #line 1134 "fts5parse.sql" 2600 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; 2601 break; 2602 case 18: /* nearset ::= CARET phrase */ 2603 #line 152 "fts5parse.y" 2604 { 2605 sqlite3Fts5ParseSetCaret(fts5yymsp[0].minor.fts5yy53); 2606 fts5yymsp[-1].minor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); 2607 } 2608 #line 1143 "fts5parse.sql" 2609 break; 2610 case 19: /* nearset ::= STRING LP nearphrases neardist_opt RP */ 2611 #line 156 "fts5parse.y" 2612 { 2613 sqlite3Fts5ParseNear(pParse, &fts5yymsp[-4].minor.fts5yy0); 2614 sqlite3Fts5ParseSetDistance(pParse, fts5yymsp[-2].minor.fts5yy46, &fts5yymsp[-1].minor.fts5yy0); 2615 fts5yylhsminor.fts5yy46 = fts5yymsp[-2].minor.fts5yy46; 2616 } 2617 #line 1152 "fts5parse.sql" 2618 fts5yymsp[-4].minor.fts5yy46 = fts5yylhsminor.fts5yy46; 2619 break; 2620 case 20: /* nearphrases ::= phrase */ 2621 #line 162 "fts5parse.y" 2622 { 2623 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, 0, fts5yymsp[0].minor.fts5yy53); 2624 } 2625 #line 1160 "fts5parse.sql" 2626 fts5yymsp[0].minor.fts5yy46 = fts5yylhsminor.fts5yy46; 2627 break; 2628 case 21: /* nearphrases ::= nearphrases phrase */ 2629 #line 165 "fts5parse.y" 2630 { 2631 fts5yylhsminor.fts5yy46 = sqlite3Fts5ParseNearset(pParse, fts5yymsp[-1].minor.fts5yy46, fts5yymsp[0].minor.fts5yy53); 2632 } 2633 #line 1168 "fts5parse.sql" 2634 fts5yymsp[-1].minor.fts5yy46 = fts5yylhsminor.fts5yy46; 2635 break; 2636 case 22: /* neardist_opt ::= */ 2637 #line 172 "fts5parse.y" 2638 { fts5yymsp[1].minor.fts5yy0.p = 0; fts5yymsp[1].minor.fts5yy0.n = 0; } 2639 #line 1174 "fts5parse.sql" 2640 break; 2641 case 23: /* neardist_opt ::= COMMA STRING */ 2642 #line 173 "fts5parse.y" 2643 { fts5yymsp[-1].minor.fts5yy0 = fts5yymsp[0].minor.fts5yy0; } 2644 #line 1179 "fts5parse.sql" 2645 break; 2646 case 24: /* phrase ::= phrase PLUS STRING star_opt */ 2647 #line 185 "fts5parse.y" 2648 { 2649 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, fts5yymsp[-3].minor.fts5yy53, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); 2650 } 2651 #line 1186 "fts5parse.sql" 2652 fts5yymsp[-3].minor.fts5yy53 = fts5yylhsminor.fts5yy53; 2653 break; 2654 case 25: /* phrase ::= STRING star_opt */ 2655 #line 188 "fts5parse.y" 2656 { 2657 fts5yylhsminor.fts5yy53 = sqlite3Fts5ParseTerm(pParse, 0, &fts5yymsp[-1].minor.fts5yy0, fts5yymsp[0].minor.fts5yy4); 2658 } 2659 #line 1194 "fts5parse.sql" 2660 fts5yymsp[-1].minor.fts5yy53 = fts5yylhsminor.fts5yy53; 2661 break; 2662 case 26: /* star_opt ::= STAR */ 2663 #line 196 "fts5parse.y" 2664 { fts5yymsp[0].minor.fts5yy4 = 1; } 2665 #line 1200 "fts5parse.sql" 2666 break; 2667 case 27: /* star_opt ::= */ 2668 #line 197 "fts5parse.y" 2669 { fts5yymsp[1].minor.fts5yy4 = 0; } 2670 #line 1205 "fts5parse.sql" 2671 break; 2672 default: 2673 break; 2674 /********** End reduce actions ************************************************/ 2675 }; 2676 assert( fts5yyruleno<sizeof(fts5yyRuleInfoLhs)/sizeof(fts5yyRuleInfoLhs[0]) ); 2677 fts5yygoto = fts5yyRuleInfoLhs[fts5yyruleno]; 2678 fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; 2679 fts5yyact = fts5yy_find_reduce_action(fts5yymsp[fts5yysize].stateno,(fts5YYCODETYPE)fts5yygoto); 2680 2681 /* There are no SHIFTREDUCE actions on nonterminals because the table 2682 ** generator has simplified them to pure REDUCE actions. */ 2683 assert( !(fts5yyact>fts5YY_MAX_SHIFT && fts5yyact<=fts5YY_MAX_SHIFTREDUCE) ); 2684 2685 /* It is not possible for a REDUCE to be followed by an error */ 2686 assert( fts5yyact!=fts5YY_ERROR_ACTION ); 2687 2688 fts5yymsp += fts5yysize+1; 2689 fts5yypParser->fts5yytos = fts5yymsp; 2690 fts5yymsp->stateno = (fts5YYACTIONTYPE)fts5yyact; 2691 fts5yymsp->major = (fts5YYCODETYPE)fts5yygoto; 2692 fts5yyTraceShift(fts5yypParser, fts5yyact, "... then shift"); 2693 return fts5yyact; 2694 } 2695 2696 /* 2697 ** The following code executes when the parse fails 2698 */ 2699 #ifndef fts5YYNOERRORRECOVERY 2700 static void fts5yy_parse_failed( 2701 fts5yyParser *fts5yypParser /* The parser */ 2702 ){ 2703 sqlite3Fts5ParserARG_FETCH 2704 sqlite3Fts5ParserCTX_FETCH 2705 #ifndef NDEBUG 2706 if( fts5yyTraceFILE ){ 2707 fprintf(fts5yyTraceFILE,"%sFail!\n",fts5yyTracePrompt); 2708 } 2709 #endif 2710 while( fts5yypParser->fts5yytos>fts5yypParser->fts5yystack ) fts5yy_pop_parser_stack(fts5yypParser); 2711 /* Here code is inserted which will be executed whenever the 2712 ** parser fails */ 2713 /************ Begin %parse_failure code ***************************************/ 2714 /************ End %parse_failure code *****************************************/ 2715 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ 2716 sqlite3Fts5ParserCTX_STORE 2717 } 2718 #endif /* fts5YYNOERRORRECOVERY */ 2719 2720 /* 2721 ** The following code executes when a syntax error first occurs. 2722 */ 2723 static void fts5yy_syntax_error( 2724 fts5yyParser *fts5yypParser, /* The parser */ 2725 int fts5yymajor, /* The major type of the error token */ 2726 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The minor type of the error token */ 2727 ){ 2728 sqlite3Fts5ParserARG_FETCH 2729 sqlite3Fts5ParserCTX_FETCH 2730 #define FTS5TOKEN fts5yyminor 2731 /************ Begin %syntax_error code ****************************************/ 2732 #line 30 "fts5parse.y" 2733 2734 UNUSED_PARAM(fts5yymajor); /* Silence a compiler warning */ 2735 sqlite3Fts5ParseError( 2736 pParse, "fts5: syntax error near \"%.*s\"",FTS5TOKEN.n,FTS5TOKEN.p 2737 ); 2738 #line 1273 "fts5parse.sql" 2739 /************ End %syntax_error code ******************************************/ 2740 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ 2741 sqlite3Fts5ParserCTX_STORE 2742 } 2743 2744 /* 2745 ** The following is executed when the parser accepts 2746 */ 2747 static void fts5yy_accept( 2748 fts5yyParser *fts5yypParser /* The parser */ 2749 ){ 2750 sqlite3Fts5ParserARG_FETCH 2751 sqlite3Fts5ParserCTX_FETCH 2752 #ifndef NDEBUG 2753 if( fts5yyTraceFILE ){ 2754 fprintf(fts5yyTraceFILE,"%sAccept!\n",fts5yyTracePrompt); 2755 } 2756 #endif 2757 #ifndef fts5YYNOERRORRECOVERY 2758 fts5yypParser->fts5yyerrcnt = -1; 2759 #endif 2760 assert( fts5yypParser->fts5yytos==fts5yypParser->fts5yystack ); 2761 /* Here code is inserted which will be executed whenever the 2762 ** parser accepts */ 2763 /*********** Begin %parse_accept code *****************************************/ 2764 /*********** End %parse_accept code *******************************************/ 2765 sqlite3Fts5ParserARG_STORE /* Suppress warning about unused %extra_argument variable */ 2766 sqlite3Fts5ParserCTX_STORE 2767 } 2768 2769 /* The main parser program. 2770 ** The first argument is a pointer to a structure obtained from 2771 ** "sqlite3Fts5ParserAlloc" which describes the current state of the parser. 2772 ** The second argument is the major token number. The third is 2773 ** the minor token. The fourth optional argument is whatever the 2774 ** user wants (and specified in the grammar) and is available for 2775 ** use by the action routines. 2776 ** 2777 ** Inputs: 2778 ** <ul> 2779 ** <li> A pointer to the parser (an opaque structure.) 2780 ** <li> The major token number. 2781 ** <li> The minor token number. 2782 ** <li> An option argument of a grammar-specified type. 2783 ** </ul> 2784 ** 2785 ** Outputs: 2786 ** None. 2787 */ 2788 static void sqlite3Fts5Parser( 2789 void *fts5yyp, /* The parser */ 2790 int fts5yymajor, /* The major token code number */ 2791 sqlite3Fts5ParserFTS5TOKENTYPE fts5yyminor /* The value for the token */ 2792 sqlite3Fts5ParserARG_PDECL /* Optional %extra_argument parameter */ 2793 ){ 2794 fts5YYMINORTYPE fts5yyminorunion; 2795 fts5YYACTIONTYPE fts5yyact; /* The parser action. */ 2796 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) 2797 int fts5yyendofinput; /* True if we are at the end of input */ 2798 #endif 2799 #ifdef fts5YYERRORSYMBOL 2800 int fts5yyerrorhit = 0; /* True if fts5yymajor has invoked an error */ 2801 #endif 2802 fts5yyParser *fts5yypParser = (fts5yyParser*)fts5yyp; /* The parser */ 2803 sqlite3Fts5ParserCTX_FETCH 2804 sqlite3Fts5ParserARG_STORE 2805 2806 assert( fts5yypParser->fts5yytos!=0 ); 2807 #if !defined(fts5YYERRORSYMBOL) && !defined(fts5YYNOERRORRECOVERY) 2808 fts5yyendofinput = (fts5yymajor==0); 2809 #endif 2810 2811 fts5yyact = fts5yypParser->fts5yytos->stateno; 2812 #ifndef NDEBUG 2813 if( fts5yyTraceFILE ){ 2814 if( fts5yyact < fts5YY_MIN_REDUCE ){ 2815 fprintf(fts5yyTraceFILE,"%sInput '%s' in state %d\n", 2816 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact); 2817 }else{ 2818 fprintf(fts5yyTraceFILE,"%sInput '%s' with pending reduce %d\n", 2819 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor],fts5yyact-fts5YY_MIN_REDUCE); 2820 } 2821 } 2822 #endif 2823 2824 while(1){ /* Exit by "break" */ 2825 assert( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystack ); 2826 assert( fts5yyact==fts5yypParser->fts5yytos->stateno ); 2827 fts5yyact = fts5yy_find_shift_action((fts5YYCODETYPE)fts5yymajor,fts5yyact); 2828 if( fts5yyact >= fts5YY_MIN_REDUCE ){ 2829 unsigned int fts5yyruleno = fts5yyact - fts5YY_MIN_REDUCE; /* Reduce by this rule */ 2830 #ifndef NDEBUG 2831 assert( fts5yyruleno<(int)(sizeof(fts5yyRuleName)/sizeof(fts5yyRuleName[0])) ); 2832 if( fts5yyTraceFILE ){ 2833 int fts5yysize = fts5yyRuleInfoNRhs[fts5yyruleno]; 2834 if( fts5yysize ){ 2835 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s, pop back to state %d.\n", 2836 fts5yyTracePrompt, 2837 fts5yyruleno, fts5yyRuleName[fts5yyruleno], 2838 fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action", 2839 fts5yypParser->fts5yytos[fts5yysize].stateno); 2840 }else{ 2841 fprintf(fts5yyTraceFILE, "%sReduce %d [%s]%s.\n", 2842 fts5yyTracePrompt, fts5yyruleno, fts5yyRuleName[fts5yyruleno], 2843 fts5yyruleno<fts5YYNRULE_WITH_ACTION ? "" : " without external action"); 2844 } 2845 } 2846 #endif /* NDEBUG */ 2847 2848 /* Check that the stack is large enough to grow by a single entry 2849 ** if the RHS of the rule is empty. This ensures that there is room 2850 ** enough on the stack to push the LHS value */ 2851 if( fts5yyRuleInfoNRhs[fts5yyruleno]==0 ){ 2852 #ifdef fts5YYTRACKMAXSTACKDEPTH 2853 if( (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)>fts5yypParser->fts5yyhwm ){ 2854 fts5yypParser->fts5yyhwm++; 2855 assert( fts5yypParser->fts5yyhwm == 2856 (int)(fts5yypParser->fts5yytos - fts5yypParser->fts5yystack)); 2857 } 2858 #endif 2859 #if fts5YYSTACKDEPTH>0 2860 if( fts5yypParser->fts5yytos>=fts5yypParser->fts5yystackEnd ){ 2861 fts5yyStackOverflow(fts5yypParser); 2862 break; 2863 } 2864 #else 2865 if( fts5yypParser->fts5yytos>=&fts5yypParser->fts5yystack[fts5yypParser->fts5yystksz-1] ){ 2866 if( fts5yyGrowStack(fts5yypParser) ){ 2867 fts5yyStackOverflow(fts5yypParser); 2868 break; 2869 } 2870 } 2871 #endif 2872 } 2873 fts5yyact = fts5yy_reduce(fts5yypParser,fts5yyruleno,fts5yymajor,fts5yyminor sqlite3Fts5ParserCTX_PARAM); 2874 }else if( fts5yyact <= fts5YY_MAX_SHIFTREDUCE ){ 2875 fts5yy_shift(fts5yypParser,fts5yyact,(fts5YYCODETYPE)fts5yymajor,fts5yyminor); 2876 #ifndef fts5YYNOERRORRECOVERY 2877 fts5yypParser->fts5yyerrcnt--; 2878 #endif 2879 break; 2880 }else if( fts5yyact==fts5YY_ACCEPT_ACTION ){ 2881 fts5yypParser->fts5yytos--; 2882 fts5yy_accept(fts5yypParser); 2883 return; 2884 }else{ 2885 assert( fts5yyact == fts5YY_ERROR_ACTION ); 2886 fts5yyminorunion.fts5yy0 = fts5yyminor; 2887 #ifdef fts5YYERRORSYMBOL 2888 int fts5yymx; 2889 #endif 2890 #ifndef NDEBUG 2891 if( fts5yyTraceFILE ){ 2892 fprintf(fts5yyTraceFILE,"%sSyntax Error!\n",fts5yyTracePrompt); 2893 } 2894 #endif 2895 #ifdef fts5YYERRORSYMBOL 2896 /* A syntax error has occurred. 2897 ** The response to an error depends upon whether or not the 2898 ** grammar defines an error token "ERROR". 2899 ** 2900 ** This is what we do if the grammar does define ERROR: 2901 ** 2902 ** * Call the %syntax_error function. 2903 ** 2904 ** * Begin popping the stack until we enter a state where 2905 ** it is legal to shift the error symbol, then shift 2906 ** the error symbol. 2907 ** 2908 ** * Set the error count to three. 2909 ** 2910 ** * Begin accepting and shifting new tokens. No new error 2911 ** processing will occur until three tokens have been 2912 ** shifted successfully. 2913 ** 2914 */ 2915 if( fts5yypParser->fts5yyerrcnt<0 ){ 2916 fts5yy_syntax_error(fts5yypParser,fts5yymajor,fts5yyminor); 2917 } 2918 fts5yymx = fts5yypParser->fts5yytos->major; 2919 if( fts5yymx==fts5YYERRORSYMBOL || fts5yyerrorhit ){ 2920 #ifndef NDEBUG 2921 if( fts5yyTraceFILE ){ 2922 fprintf(fts5yyTraceFILE,"%sDiscard input token %s\n", 2923 fts5yyTracePrompt,fts5yyTokenName[fts5yymajor]); 2924 } 2925 #endif 2926 fts5yy_destructor(fts5yypParser, (fts5YYCODETYPE)fts5yymajor, &fts5yyminorunion); 2927 fts5yymajor = fts5YYNOCODE; 2928 }else{ 2929 while( fts5yypParser->fts5yytos > fts5yypParser->fts5yystack ){ 2930 fts5yyact = fts5yy_find_reduce_action(fts5yypParser->fts5yytos->stateno, 2931 fts5YYERRORSYMBOL); 2932 if( fts5yyact<=fts5YY_MAX_SHIFTREDUCE ) break; 2933 fts5yy_pop_parser_stack(fts5yypParser); 2934 } 2935 if( fts5yypParser->fts5yytos <= fts5yypParser->fts5yystack || fts5yymajor==0 ){ 2936 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); 2937 fts5yy_parse_failed(fts5yypParser); 2938 #ifndef fts5YYNOERRORRECOVERY 2939 fts5yypParser->fts5yyerrcnt = -1; 2940 #endif 2941 fts5yymajor = fts5YYNOCODE; 2942 }else if( fts5yymx!=fts5YYERRORSYMBOL ){ 2943 fts5yy_shift(fts5yypParser,fts5yyact,fts5YYERRORSYMBOL,fts5yyminor); 2944 } 2945 } 2946 fts5yypParser->fts5yyerrcnt = 3; 2947 fts5yyerrorhit = 1; 2948 if( fts5yymajor==fts5YYNOCODE ) break; 2949 fts5yyact = fts5yypParser->fts5yytos->stateno; 2950 #elif defined(fts5YYNOERRORRECOVERY) 2951 /* If the fts5YYNOERRORRECOVERY macro is defined, then do not attempt to 2952 ** do any kind of error recovery. Instead, simply invoke the syntax 2953 ** error routine and continue going as if nothing had happened. 2954 ** 2955 ** Applications can set this macro (for example inside %include) if 2956 ** they intend to abandon the parse upon the first syntax error seen. 2957 */ 2958 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); 2959 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); 2960 break; 2961 #else /* fts5YYERRORSYMBOL is not defined */ 2962 /* This is what we do if the grammar does not define ERROR: 2963 ** 2964 ** * Report an error message, and throw away the input token. 2965 ** 2966 ** * If the input token is $, then fail the parse. 2967 ** 2968 ** As before, subsequent error messages are suppressed until 2969 ** three input tokens have been successfully shifted. 2970 */ 2971 if( fts5yypParser->fts5yyerrcnt<=0 ){ 2972 fts5yy_syntax_error(fts5yypParser,fts5yymajor, fts5yyminor); 2973 } 2974 fts5yypParser->fts5yyerrcnt = 3; 2975 fts5yy_destructor(fts5yypParser,(fts5YYCODETYPE)fts5yymajor,&fts5yyminorunion); 2976 if( fts5yyendofinput ){ 2977 fts5yy_parse_failed(fts5yypParser); 2978 #ifndef fts5YYNOERRORRECOVERY 2979 fts5yypParser->fts5yyerrcnt = -1; 2980 #endif 2981 } 2982 break; 2983 #endif 2984 } 2985 } 2986 #ifndef NDEBUG 2987 if( fts5yyTraceFILE ){ 2988 fts5yyStackEntry *i; 2989 char cDiv = '['; 2990 fprintf(fts5yyTraceFILE,"%sReturn. Stack=",fts5yyTracePrompt); 2991 for(i=&fts5yypParser->fts5yystack[1]; i<=fts5yypParser->fts5yytos; i++){ 2992 fprintf(fts5yyTraceFILE,"%c%s", cDiv, fts5yyTokenName[i->major]); 2993 cDiv = ' '; 2994 } 2995 fprintf(fts5yyTraceFILE,"]\n"); 2996 } 2997 #endif 2998 return; 2999 } 3000 3001 /* 3002 ** Return the fallback token corresponding to canonical token iToken, or 3003 ** 0 if iToken has no fallback. 3004 */ 3005 static int sqlite3Fts5ParserFallback(int iToken){ 3006 #ifdef fts5YYFALLBACK 3007 assert( iToken<(int)(sizeof(fts5yyFallback)/sizeof(fts5yyFallback[0])) ); 3008 return fts5yyFallback[iToken]; 3009 #else 3010 (void)iToken; 3011 return 0; 3012 #endif 3013 } 3014 3015 #line 1 "fts5_aux.c" 3016 /* 3017 ** 2014 May 31 3018 ** 3019 ** The author disclaims copyright to this source code. In place of 3020 ** a legal notice, here is a blessing: 3021 ** 3022 ** May you do good and not evil. 3023 ** May you find forgiveness for yourself and forgive others. 3024 ** May you share freely, never taking more than you give. 3025 ** 3026 ****************************************************************************** 3027 */ 3028 3029 3030 /* #include "fts5Int.h" */ 3031 #include <math.h> /* amalgamator: keep */ 3032 3033 /* 3034 ** Object used to iterate through all "coalesced phrase instances" in 3035 ** a single column of the current row. If the phrase instances in the 3036 ** column being considered do not overlap, this object simply iterates 3037 ** through them. Or, if they do overlap (share one or more tokens in 3038 ** common), each set of overlapping instances is treated as a single 3039 ** match. See documentation for the highlight() auxiliary function for 3040 ** details. 3041 ** 3042 ** Usage is: 3043 ** 3044 ** for(rc = fts5CInstIterNext(pApi, pFts, iCol, &iter); 3045 ** (rc==SQLITE_OK && 0==fts5CInstIterEof(&iter); 3046 ** rc = fts5CInstIterNext(&iter) 3047 ** ){ 3048 ** printf("instance starts at %d, ends at %d\n", iter.iStart, iter.iEnd); 3049 ** } 3050 ** 3051 */ 3052 typedef struct CInstIter CInstIter; 3053 struct CInstIter { 3054 const Fts5ExtensionApi *pApi; /* API offered by current FTS version */ 3055 Fts5Context *pFts; /* First arg to pass to pApi functions */ 3056 int iCol; /* Column to search */ 3057 int iInst; /* Next phrase instance index */ 3058 int nInst; /* Total number of phrase instances */ 3059 3060 /* Output variables */ 3061 int iStart; /* First token in coalesced phrase instance */ 3062 int iEnd; /* Last token in coalesced phrase instance */ 3063 }; 3064 3065 /* 3066 ** Advance the iterator to the next coalesced phrase instance. Return 3067 ** an SQLite error code if an error occurs, or SQLITE_OK otherwise. 3068 */ 3069 static int fts5CInstIterNext(CInstIter *pIter){ 3070 int rc = SQLITE_OK; 3071 pIter->iStart = -1; 3072 pIter->iEnd = -1; 3073 3074 while( rc==SQLITE_OK && pIter->iInst<pIter->nInst ){ 3075 int ip; int ic; int io; 3076 rc = pIter->pApi->xInst(pIter->pFts, pIter->iInst, &ip, &ic, &io); 3077 if( rc==SQLITE_OK ){ 3078 if( ic==pIter->iCol ){ 3079 int iEnd = io - 1 + pIter->pApi->xPhraseSize(pIter->pFts, ip); 3080 if( pIter->iStart<0 ){ 3081 pIter->iStart = io; 3082 pIter->iEnd = iEnd; 3083 }else if( io<=pIter->iEnd ){ 3084 if( iEnd>pIter->iEnd ) pIter->iEnd = iEnd; 3085 }else{ 3086 break; 3087 } 3088 } 3089 pIter->iInst++; 3090 } 3091 } 3092 3093 return rc; 3094 } 3095 3096 /* 3097 ** Initialize the iterator object indicated by the final parameter to 3098 ** iterate through coalesced phrase instances in column iCol. 3099 */ 3100 static int fts5CInstIterInit( 3101 const Fts5ExtensionApi *pApi, 3102 Fts5Context *pFts, 3103 int iCol, 3104 CInstIter *pIter 3105 ){ 3106 int rc; 3107 3108 memset(pIter, 0, sizeof(CInstIter)); 3109 pIter->pApi = pApi; 3110 pIter->pFts = pFts; 3111 pIter->iCol = iCol; 3112 rc = pApi->xInstCount(pFts, &pIter->nInst); 3113 3114 if( rc==SQLITE_OK ){ 3115 rc = fts5CInstIterNext(pIter); 3116 } 3117 3118 return rc; 3119 } 3120 3121 3122 3123 /************************************************************************* 3124 ** Start of highlight() implementation. 3125 */ 3126 typedef struct HighlightContext HighlightContext; 3127 struct HighlightContext { 3128 CInstIter iter; /* Coalesced Instance Iterator */ 3129 int iPos; /* Current token offset in zIn[] */ 3130 int iRangeStart; /* First token to include */ 3131 int iRangeEnd; /* If non-zero, last token to include */ 3132 const char *zOpen; /* Opening highlight */ 3133 const char *zClose; /* Closing highlight */ 3134 const char *zIn; /* Input text */ 3135 int nIn; /* Size of input text in bytes */ 3136 int iOff; /* Current offset within zIn[] */ 3137 char *zOut; /* Output value */ 3138 }; 3139 3140 /* 3141 ** Append text to the HighlightContext output string - p->zOut. Argument 3142 ** z points to a buffer containing n bytes of text to append. If n is 3143 ** negative, everything up until the first '\0' is appended to the output. 3144 ** 3145 ** If *pRc is set to any value other than SQLITE_OK when this function is 3146 ** called, it is a no-op. If an error (i.e. an OOM condition) is encountered, 3147 ** *pRc is set to an error code before returning. 3148 */ 3149 static void fts5HighlightAppend( 3150 int *pRc, 3151 HighlightContext *p, 3152 const char *z, int n 3153 ){ 3154 if( *pRc==SQLITE_OK && z ){ 3155 if( n<0 ) n = (int)strlen(z); 3156 p->zOut = sqlite3_mprintf("%z%.*s", p->zOut, n, z); 3157 if( p->zOut==0 ) *pRc = SQLITE_NOMEM; 3158 } 3159 } 3160 3161 /* 3162 ** Tokenizer callback used by implementation of highlight() function. 3163 */ 3164 static int fts5HighlightCb( 3165 void *pContext, /* Pointer to HighlightContext object */ 3166 int tflags, /* Mask of FTS5_TOKEN_* flags */ 3167 const char *pToken, /* Buffer containing token */ 3168 int nToken, /* Size of token in bytes */ 3169 int iStartOff, /* Start offset of token */ 3170 int iEndOff /* End offset of token */ 3171 ){ 3172 HighlightContext *p = (HighlightContext*)pContext; 3173 int rc = SQLITE_OK; 3174 int iPos; 3175 3176 UNUSED_PARAM2(pToken, nToken); 3177 3178 if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK; 3179 iPos = p->iPos++; 3180 3181 if( p->iRangeEnd>0 ){ 3182 if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK; 3183 if( p->iRangeStart && iPos==p->iRangeStart ) p->iOff = iStartOff; 3184 } 3185 3186 if( iPos==p->iter.iStart ){ 3187 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iStartOff - p->iOff); 3188 fts5HighlightAppend(&rc, p, p->zOpen, -1); 3189 p->iOff = iStartOff; 3190 } 3191 3192 if( iPos==p->iter.iEnd ){ 3193 if( p->iRangeEnd && p->iter.iStart<p->iRangeStart ){ 3194 fts5HighlightAppend(&rc, p, p->zOpen, -1); 3195 } 3196 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); 3197 fts5HighlightAppend(&rc, p, p->zClose, -1); 3198 p->iOff = iEndOff; 3199 if( rc==SQLITE_OK ){ 3200 rc = fts5CInstIterNext(&p->iter); 3201 } 3202 } 3203 3204 if( p->iRangeEnd>0 && iPos==p->iRangeEnd ){ 3205 fts5HighlightAppend(&rc, p, &p->zIn[p->iOff], iEndOff - p->iOff); 3206 p->iOff = iEndOff; 3207 if( iPos>=p->iter.iStart && iPos<p->iter.iEnd ){ 3208 fts5HighlightAppend(&rc, p, p->zClose, -1); 3209 } 3210 } 3211 3212 return rc; 3213 } 3214 3215 /* 3216 ** Implementation of highlight() function. 3217 */ 3218 static void fts5HighlightFunction( 3219 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 3220 Fts5Context *pFts, /* First arg to pass to pApi functions */ 3221 sqlite3_context *pCtx, /* Context for returning result/error */ 3222 int nVal, /* Number of values in apVal[] array */ 3223 sqlite3_value **apVal /* Array of trailing arguments */ 3224 ){ 3225 HighlightContext ctx; 3226 int rc; 3227 int iCol; 3228 3229 if( nVal!=3 ){ 3230 const char *zErr = "wrong number of arguments to function highlight()"; 3231 sqlite3_result_error(pCtx, zErr, -1); 3232 return; 3233 } 3234 3235 iCol = sqlite3_value_int(apVal[0]); 3236 memset(&ctx, 0, sizeof(HighlightContext)); 3237 ctx.zOpen = (const char*)sqlite3_value_text(apVal[1]); 3238 ctx.zClose = (const char*)sqlite3_value_text(apVal[2]); 3239 rc = pApi->xColumnText(pFts, iCol, &ctx.zIn, &ctx.nIn); 3240 3241 if( ctx.zIn ){ 3242 if( rc==SQLITE_OK ){ 3243 rc = fts5CInstIterInit(pApi, pFts, iCol, &ctx.iter); 3244 } 3245 3246 if( rc==SQLITE_OK ){ 3247 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); 3248 } 3249 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); 3250 3251 if( rc==SQLITE_OK ){ 3252 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); 3253 } 3254 sqlite3_free(ctx.zOut); 3255 } 3256 if( rc!=SQLITE_OK ){ 3257 sqlite3_result_error_code(pCtx, rc); 3258 } 3259 } 3260 /* 3261 ** End of highlight() implementation. 3262 **************************************************************************/ 3263 3264 /* 3265 ** Context object passed to the fts5SentenceFinderCb() function. 3266 */ 3267 typedef struct Fts5SFinder Fts5SFinder; 3268 struct Fts5SFinder { 3269 int iPos; /* Current token position */ 3270 int nFirstAlloc; /* Allocated size of aFirst[] */ 3271 int nFirst; /* Number of entries in aFirst[] */ 3272 int *aFirst; /* Array of first token in each sentence */ 3273 const char *zDoc; /* Document being tokenized */ 3274 }; 3275 3276 /* 3277 ** Add an entry to the Fts5SFinder.aFirst[] array. Grow the array if 3278 ** necessary. Return SQLITE_OK if successful, or SQLITE_NOMEM if an 3279 ** error occurs. 3280 */ 3281 static int fts5SentenceFinderAdd(Fts5SFinder *p, int iAdd){ 3282 if( p->nFirstAlloc==p->nFirst ){ 3283 int nNew = p->nFirstAlloc ? p->nFirstAlloc*2 : 64; 3284 int *aNew; 3285 3286 aNew = (int*)sqlite3_realloc64(p->aFirst, nNew*sizeof(int)); 3287 if( aNew==0 ) return SQLITE_NOMEM; 3288 p->aFirst = aNew; 3289 p->nFirstAlloc = nNew; 3290 } 3291 p->aFirst[p->nFirst++] = iAdd; 3292 return SQLITE_OK; 3293 } 3294 3295 /* 3296 ** This function is an xTokenize() callback used by the auxiliary snippet() 3297 ** function. Its job is to identify tokens that are the first in a sentence. 3298 ** For each such token, an entry is added to the SFinder.aFirst[] array. 3299 */ 3300 static int fts5SentenceFinderCb( 3301 void *pContext, /* Pointer to HighlightContext object */ 3302 int tflags, /* Mask of FTS5_TOKEN_* flags */ 3303 const char *pToken, /* Buffer containing token */ 3304 int nToken, /* Size of token in bytes */ 3305 int iStartOff, /* Start offset of token */ 3306 int iEndOff /* End offset of token */ 3307 ){ 3308 int rc = SQLITE_OK; 3309 3310 UNUSED_PARAM2(pToken, nToken); 3311 UNUSED_PARAM(iEndOff); 3312 3313 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ 3314 Fts5SFinder *p = (Fts5SFinder*)pContext; 3315 if( p->iPos>0 ){ 3316 int i; 3317 char c = 0; 3318 for(i=iStartOff-1; i>=0; i--){ 3319 c = p->zDoc[i]; 3320 if( c!=' ' && c!='\t' && c!='\n' && c!='\r' ) break; 3321 } 3322 if( i!=iStartOff-1 && (c=='.' || c==':') ){ 3323 rc = fts5SentenceFinderAdd(p, p->iPos); 3324 } 3325 }else{ 3326 rc = fts5SentenceFinderAdd(p, 0); 3327 } 3328 p->iPos++; 3329 } 3330 return rc; 3331 } 3332 3333 static int fts5SnippetScore( 3334 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 3335 Fts5Context *pFts, /* First arg to pass to pApi functions */ 3336 int nDocsize, /* Size of column in tokens */ 3337 unsigned char *aSeen, /* Array with one element per query phrase */ 3338 int iCol, /* Column to score */ 3339 int iPos, /* Starting offset to score */ 3340 int nToken, /* Max tokens per snippet */ 3341 int *pnScore, /* OUT: Score */ 3342 int *piPos /* OUT: Adjusted offset */ 3343 ){ 3344 int rc; 3345 int i; 3346 int ip = 0; 3347 int ic = 0; 3348 int iOff = 0; 3349 int iFirst = -1; 3350 int nInst; 3351 int nScore = 0; 3352 int iLast = 0; 3353 sqlite3_int64 iEnd = (sqlite3_int64)iPos + nToken; 3354 3355 rc = pApi->xInstCount(pFts, &nInst); 3356 for(i=0; i<nInst && rc==SQLITE_OK; i++){ 3357 rc = pApi->xInst(pFts, i, &ip, &ic, &iOff); 3358 if( rc==SQLITE_OK && ic==iCol && iOff>=iPos && iOff<iEnd ){ 3359 nScore += (aSeen[ip] ? 1 : 1000); 3360 aSeen[ip] = 1; 3361 if( iFirst<0 ) iFirst = iOff; 3362 iLast = iOff + pApi->xPhraseSize(pFts, ip); 3363 } 3364 } 3365 3366 *pnScore = nScore; 3367 if( piPos ){ 3368 sqlite3_int64 iAdj = iFirst - (nToken - (iLast-iFirst)) / 2; 3369 if( (iAdj+nToken)>nDocsize ) iAdj = nDocsize - nToken; 3370 if( iAdj<0 ) iAdj = 0; 3371 *piPos = (int)iAdj; 3372 } 3373 3374 return rc; 3375 } 3376 3377 /* 3378 ** Return the value in pVal interpreted as utf-8 text. Except, if pVal 3379 ** contains a NULL value, return a pointer to a static string zero 3380 ** bytes in length instead of a NULL pointer. 3381 */ 3382 static const char *fts5ValueToText(sqlite3_value *pVal){ 3383 const char *zRet = (const char*)sqlite3_value_text(pVal); 3384 return zRet ? zRet : ""; 3385 } 3386 3387 /* 3388 ** Implementation of snippet() function. 3389 */ 3390 static void fts5SnippetFunction( 3391 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 3392 Fts5Context *pFts, /* First arg to pass to pApi functions */ 3393 sqlite3_context *pCtx, /* Context for returning result/error */ 3394 int nVal, /* Number of values in apVal[] array */ 3395 sqlite3_value **apVal /* Array of trailing arguments */ 3396 ){ 3397 HighlightContext ctx; 3398 int rc = SQLITE_OK; /* Return code */ 3399 int iCol; /* 1st argument to snippet() */ 3400 const char *zEllips; /* 4th argument to snippet() */ 3401 int nToken; /* 5th argument to snippet() */ 3402 int nInst = 0; /* Number of instance matches this row */ 3403 int i; /* Used to iterate through instances */ 3404 int nPhrase; /* Number of phrases in query */ 3405 unsigned char *aSeen; /* Array of "seen instance" flags */ 3406 int iBestCol; /* Column containing best snippet */ 3407 int iBestStart = 0; /* First token of best snippet */ 3408 int nBestScore = 0; /* Score of best snippet */ 3409 int nColSize = 0; /* Total size of iBestCol in tokens */ 3410 Fts5SFinder sFinder; /* Used to find the beginnings of sentences */ 3411 int nCol; 3412 3413 if( nVal!=5 ){ 3414 const char *zErr = "wrong number of arguments to function snippet()"; 3415 sqlite3_result_error(pCtx, zErr, -1); 3416 return; 3417 } 3418 3419 nCol = pApi->xColumnCount(pFts); 3420 memset(&ctx, 0, sizeof(HighlightContext)); 3421 iCol = sqlite3_value_int(apVal[0]); 3422 ctx.zOpen = fts5ValueToText(apVal[1]); 3423 ctx.zClose = fts5ValueToText(apVal[2]); 3424 zEllips = fts5ValueToText(apVal[3]); 3425 nToken = sqlite3_value_int(apVal[4]); 3426 3427 iBestCol = (iCol>=0 ? iCol : 0); 3428 nPhrase = pApi->xPhraseCount(pFts); 3429 aSeen = sqlite3_malloc(nPhrase); 3430 if( aSeen==0 ){ 3431 rc = SQLITE_NOMEM; 3432 } 3433 if( rc==SQLITE_OK ){ 3434 rc = pApi->xInstCount(pFts, &nInst); 3435 } 3436 3437 memset(&sFinder, 0, sizeof(Fts5SFinder)); 3438 for(i=0; i<nCol; i++){ 3439 if( iCol<0 || iCol==i ){ 3440 int nDoc; 3441 int nDocsize; 3442 int ii; 3443 sFinder.iPos = 0; 3444 sFinder.nFirst = 0; 3445 rc = pApi->xColumnText(pFts, i, &sFinder.zDoc, &nDoc); 3446 if( rc!=SQLITE_OK ) break; 3447 rc = pApi->xTokenize(pFts, 3448 sFinder.zDoc, nDoc, (void*)&sFinder,fts5SentenceFinderCb 3449 ); 3450 if( rc!=SQLITE_OK ) break; 3451 rc = pApi->xColumnSize(pFts, i, &nDocsize); 3452 if( rc!=SQLITE_OK ) break; 3453 3454 for(ii=0; rc==SQLITE_OK && ii<nInst; ii++){ 3455 int ip, ic, io; 3456 int iAdj; 3457 int nScore; 3458 int jj; 3459 3460 rc = pApi->xInst(pFts, ii, &ip, &ic, &io); 3461 if( ic!=i ) continue; 3462 if( io>nDocsize ) rc = FTS5_CORRUPT; 3463 if( rc!=SQLITE_OK ) continue; 3464 memset(aSeen, 0, nPhrase); 3465 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 3466 io, nToken, &nScore, &iAdj 3467 ); 3468 if( rc==SQLITE_OK && nScore>nBestScore ){ 3469 nBestScore = nScore; 3470 iBestCol = i; 3471 iBestStart = iAdj; 3472 nColSize = nDocsize; 3473 } 3474 3475 if( rc==SQLITE_OK && sFinder.nFirst && nDocsize>nToken ){ 3476 for(jj=0; jj<(sFinder.nFirst-1); jj++){ 3477 if( sFinder.aFirst[jj+1]>io ) break; 3478 } 3479 3480 if( sFinder.aFirst[jj]<io ){ 3481 memset(aSeen, 0, nPhrase); 3482 rc = fts5SnippetScore(pApi, pFts, nDocsize, aSeen, i, 3483 sFinder.aFirst[jj], nToken, &nScore, 0 3484 ); 3485 3486 nScore += (sFinder.aFirst[jj]==0 ? 120 : 100); 3487 if( rc==SQLITE_OK && nScore>nBestScore ){ 3488 nBestScore = nScore; 3489 iBestCol = i; 3490 iBestStart = sFinder.aFirst[jj]; 3491 nColSize = nDocsize; 3492 } 3493 } 3494 } 3495 } 3496 } 3497 } 3498 3499 if( rc==SQLITE_OK ){ 3500 rc = pApi->xColumnText(pFts, iBestCol, &ctx.zIn, &ctx.nIn); 3501 } 3502 if( rc==SQLITE_OK && nColSize==0 ){ 3503 rc = pApi->xColumnSize(pFts, iBestCol, &nColSize); 3504 } 3505 if( ctx.zIn ){ 3506 if( rc==SQLITE_OK ){ 3507 rc = fts5CInstIterInit(pApi, pFts, iBestCol, &ctx.iter); 3508 } 3509 3510 ctx.iRangeStart = iBestStart; 3511 ctx.iRangeEnd = iBestStart + nToken - 1; 3512 3513 if( iBestStart>0 ){ 3514 fts5HighlightAppend(&rc, &ctx, zEllips, -1); 3515 } 3516 3517 /* Advance iterator ctx.iter so that it points to the first coalesced 3518 ** phrase instance at or following position iBestStart. */ 3519 while( ctx.iter.iStart>=0 && ctx.iter.iStart<iBestStart && rc==SQLITE_OK ){ 3520 rc = fts5CInstIterNext(&ctx.iter); 3521 } 3522 3523 if( rc==SQLITE_OK ){ 3524 rc = pApi->xTokenize(pFts, ctx.zIn, ctx.nIn, (void*)&ctx,fts5HighlightCb); 3525 } 3526 if( ctx.iRangeEnd>=(nColSize-1) ){ 3527 fts5HighlightAppend(&rc, &ctx, &ctx.zIn[ctx.iOff], ctx.nIn - ctx.iOff); 3528 }else{ 3529 fts5HighlightAppend(&rc, &ctx, zEllips, -1); 3530 } 3531 } 3532 if( rc==SQLITE_OK ){ 3533 sqlite3_result_text(pCtx, (const char*)ctx.zOut, -1, SQLITE_TRANSIENT); 3534 }else{ 3535 sqlite3_result_error_code(pCtx, rc); 3536 } 3537 sqlite3_free(ctx.zOut); 3538 sqlite3_free(aSeen); 3539 sqlite3_free(sFinder.aFirst); 3540 } 3541 3542 /************************************************************************/ 3543 3544 /* 3545 ** The first time the bm25() function is called for a query, an instance 3546 ** of the following structure is allocated and populated. 3547 */ 3548 typedef struct Fts5Bm25Data Fts5Bm25Data; 3549 struct Fts5Bm25Data { 3550 int nPhrase; /* Number of phrases in query */ 3551 double avgdl; /* Average number of tokens in each row */ 3552 double *aIDF; /* IDF for each phrase */ 3553 double *aFreq; /* Array used to calculate phrase freq. */ 3554 }; 3555 3556 /* 3557 ** Callback used by fts5Bm25GetData() to count the number of rows in the 3558 ** table matched by each individual phrase within the query. 3559 */ 3560 static int fts5CountCb( 3561 const Fts5ExtensionApi *pApi, 3562 Fts5Context *pFts, 3563 void *pUserData /* Pointer to sqlite3_int64 variable */ 3564 ){ 3565 sqlite3_int64 *pn = (sqlite3_int64*)pUserData; 3566 UNUSED_PARAM2(pApi, pFts); 3567 (*pn)++; 3568 return SQLITE_OK; 3569 } 3570 3571 /* 3572 ** Set *ppData to point to the Fts5Bm25Data object for the current query. 3573 ** If the object has not already been allocated, allocate and populate it 3574 ** now. 3575 */ 3576 static int fts5Bm25GetData( 3577 const Fts5ExtensionApi *pApi, 3578 Fts5Context *pFts, 3579 Fts5Bm25Data **ppData /* OUT: bm25-data object for this query */ 3580 ){ 3581 int rc = SQLITE_OK; /* Return code */ 3582 Fts5Bm25Data *p; /* Object to return */ 3583 3584 p = (Fts5Bm25Data*)pApi->xGetAuxdata(pFts, 0); 3585 if( p==0 ){ 3586 int nPhrase; /* Number of phrases in query */ 3587 sqlite3_int64 nRow = 0; /* Number of rows in table */ 3588 sqlite3_int64 nToken = 0; /* Number of tokens in table */ 3589 sqlite3_int64 nByte; /* Bytes of space to allocate */ 3590 int i; 3591 3592 /* Allocate the Fts5Bm25Data object */ 3593 nPhrase = pApi->xPhraseCount(pFts); 3594 nByte = sizeof(Fts5Bm25Data) + nPhrase*2*sizeof(double); 3595 p = (Fts5Bm25Data*)sqlite3_malloc64(nByte); 3596 if( p==0 ){ 3597 rc = SQLITE_NOMEM; 3598 }else{ 3599 memset(p, 0, (size_t)nByte); 3600 p->nPhrase = nPhrase; 3601 p->aIDF = (double*)&p[1]; 3602 p->aFreq = &p->aIDF[nPhrase]; 3603 } 3604 3605 /* Calculate the average document length for this FTS5 table */ 3606 if( rc==SQLITE_OK ) rc = pApi->xRowCount(pFts, &nRow); 3607 assert( rc!=SQLITE_OK || nRow>0 ); 3608 if( rc==SQLITE_OK ) rc = pApi->xColumnTotalSize(pFts, -1, &nToken); 3609 if( rc==SQLITE_OK ) p->avgdl = (double)nToken / (double)nRow; 3610 3611 /* Calculate an IDF for each phrase in the query */ 3612 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ 3613 sqlite3_int64 nHit = 0; 3614 rc = pApi->xQueryPhrase(pFts, i, (void*)&nHit, fts5CountCb); 3615 if( rc==SQLITE_OK ){ 3616 /* Calculate the IDF (Inverse Document Frequency) for phrase i. 3617 ** This is done using the standard BM25 formula as found on wikipedia: 3618 ** 3619 ** IDF = log( (N - nHit + 0.5) / (nHit + 0.5) ) 3620 ** 3621 ** where "N" is the total number of documents in the set and nHit 3622 ** is the number that contain at least one instance of the phrase 3623 ** under consideration. 3624 ** 3625 ** The problem with this is that if (N < 2*nHit), the IDF is 3626 ** negative. Which is undesirable. So the mimimum allowable IDF is 3627 ** (1e-6) - roughly the same as a term that appears in just over 3628 ** half of set of 5,000,000 documents. */ 3629 double idf = log( (nRow - nHit + 0.5) / (nHit + 0.5) ); 3630 if( idf<=0.0 ) idf = 1e-6; 3631 p->aIDF[i] = idf; 3632 } 3633 } 3634 3635 if( rc!=SQLITE_OK ){ 3636 sqlite3_free(p); 3637 }else{ 3638 rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); 3639 } 3640 if( rc!=SQLITE_OK ) p = 0; 3641 } 3642 *ppData = p; 3643 return rc; 3644 } 3645 3646 /* 3647 ** Implementation of bm25() function. 3648 */ 3649 static void fts5Bm25Function( 3650 const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ 3651 Fts5Context *pFts, /* First arg to pass to pApi functions */ 3652 sqlite3_context *pCtx, /* Context for returning result/error */ 3653 int nVal, /* Number of values in apVal[] array */ 3654 sqlite3_value **apVal /* Array of trailing arguments */ 3655 ){ 3656 const double k1 = 1.2; /* Constant "k1" from BM25 formula */ 3657 const double b = 0.75; /* Constant "b" from BM25 formula */ 3658 int rc; /* Error code */ 3659 double score = 0.0; /* SQL function return value */ 3660 Fts5Bm25Data *pData; /* Values allocated/calculated once only */ 3661 int i; /* Iterator variable */ 3662 int nInst = 0; /* Value returned by xInstCount() */ 3663 double D = 0.0; /* Total number of tokens in row */ 3664 double *aFreq = 0; /* Array of phrase freq. for current row */ 3665 3666 /* Calculate the phrase frequency (symbol "f(qi,D)" in the documentation) 3667 ** for each phrase in the query for the current row. */ 3668 rc = fts5Bm25GetData(pApi, pFts, &pData); 3669 if( rc==SQLITE_OK ){ 3670 aFreq = pData->aFreq; 3671 memset(aFreq, 0, sizeof(double) * pData->nPhrase); 3672 rc = pApi->xInstCount(pFts, &nInst); 3673 } 3674 for(i=0; rc==SQLITE_OK && i<nInst; i++){ 3675 int ip; int ic; int io; 3676 rc = pApi->xInst(pFts, i, &ip, &ic, &io); 3677 if( rc==SQLITE_OK ){ 3678 double w = (nVal > ic) ? sqlite3_value_double(apVal[ic]) : 1.0; 3679 aFreq[ip] += w; 3680 } 3681 } 3682 3683 /* Figure out the total size of the current row in tokens. */ 3684 if( rc==SQLITE_OK ){ 3685 int nTok; 3686 rc = pApi->xColumnSize(pFts, -1, &nTok); 3687 D = (double)nTok; 3688 } 3689 3690 /* Determine and return the BM25 score for the current row. Or, if an 3691 ** error has occurred, throw an exception. */ 3692 if( rc==SQLITE_OK ){ 3693 for(i=0; i<pData->nPhrase; i++){ 3694 score += pData->aIDF[i] * ( 3695 ( aFreq[i] * (k1 + 1.0) ) / 3696 ( aFreq[i] + k1 * (1 - b + b * D / pData->avgdl) ) 3697 ); 3698 } 3699 sqlite3_result_double(pCtx, -1.0 * score); 3700 }else{ 3701 sqlite3_result_error_code(pCtx, rc); 3702 } 3703 } 3704 3705 static int sqlite3Fts5AuxInit(fts5_api *pApi){ 3706 struct Builtin { 3707 const char *zFunc; /* Function name (nul-terminated) */ 3708 void *pUserData; /* User-data pointer */ 3709 fts5_extension_function xFunc;/* Callback function */ 3710 void (*xDestroy)(void*); /* Destructor function */ 3711 } aBuiltin [] = { 3712 { "snippet", 0, fts5SnippetFunction, 0 }, 3713 { "highlight", 0, fts5HighlightFunction, 0 }, 3714 { "bm25", 0, fts5Bm25Function, 0 }, 3715 }; 3716 int rc = SQLITE_OK; /* Return code */ 3717 int i; /* To iterate through builtin functions */ 3718 3719 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ 3720 rc = pApi->xCreateFunction(pApi, 3721 aBuiltin[i].zFunc, 3722 aBuiltin[i].pUserData, 3723 aBuiltin[i].xFunc, 3724 aBuiltin[i].xDestroy 3725 ); 3726 } 3727 3728 return rc; 3729 } 3730 3731 #line 1 "fts5_buffer.c" 3732 /* 3733 ** 2014 May 31 3734 ** 3735 ** The author disclaims copyright to this source code. In place of 3736 ** a legal notice, here is a blessing: 3737 ** 3738 ** May you do good and not evil. 3739 ** May you find forgiveness for yourself and forgive others. 3740 ** May you share freely, never taking more than you give. 3741 ** 3742 ****************************************************************************** 3743 */ 3744 3745 3746 3747 /* #include "fts5Int.h" */ 3748 3749 static int sqlite3Fts5BufferSize(int *pRc, Fts5Buffer *pBuf, u32 nByte){ 3750 if( (u32)pBuf->nSpace<nByte ){ 3751 u64 nNew = pBuf->nSpace ? pBuf->nSpace : 64; 3752 u8 *pNew; 3753 while( nNew<nByte ){ 3754 nNew = nNew * 2; 3755 } 3756 pNew = sqlite3_realloc64(pBuf->p, nNew); 3757 if( pNew==0 ){ 3758 *pRc = SQLITE_NOMEM; 3759 return 1; 3760 }else{ 3761 pBuf->nSpace = (int)nNew; 3762 pBuf->p = pNew; 3763 } 3764 } 3765 return 0; 3766 } 3767 3768 3769 /* 3770 ** Encode value iVal as an SQLite varint and append it to the buffer object 3771 ** pBuf. If an OOM error occurs, set the error code in p. 3772 */ 3773 static void sqlite3Fts5BufferAppendVarint(int *pRc, Fts5Buffer *pBuf, i64 iVal){ 3774 if( fts5BufferGrow(pRc, pBuf, 9) ) return; 3775 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iVal); 3776 } 3777 3778 static void sqlite3Fts5Put32(u8 *aBuf, int iVal){ 3779 aBuf[0] = (iVal>>24) & 0x00FF; 3780 aBuf[1] = (iVal>>16) & 0x00FF; 3781 aBuf[2] = (iVal>> 8) & 0x00FF; 3782 aBuf[3] = (iVal>> 0) & 0x00FF; 3783 } 3784 3785 static int sqlite3Fts5Get32(const u8 *aBuf){ 3786 return (int)((((u32)aBuf[0])<<24) + (aBuf[1]<<16) + (aBuf[2]<<8) + aBuf[3]); 3787 } 3788 3789 /* 3790 ** Append buffer nData/pData to buffer pBuf. If an OOM error occurs, set 3791 ** the error code in p. If an error has already occurred when this function 3792 ** is called, it is a no-op. 3793 */ 3794 static void sqlite3Fts5BufferAppendBlob( 3795 int *pRc, 3796 Fts5Buffer *pBuf, 3797 u32 nData, 3798 const u8 *pData 3799 ){ 3800 if( nData ){ 3801 if( fts5BufferGrow(pRc, pBuf, nData) ) return; 3802 memcpy(&pBuf->p[pBuf->n], pData, nData); 3803 pBuf->n += nData; 3804 } 3805 } 3806 3807 /* 3808 ** Append the nul-terminated string zStr to the buffer pBuf. This function 3809 ** ensures that the byte following the buffer data is set to 0x00, even 3810 ** though this byte is not included in the pBuf->n count. 3811 */ 3812 static void sqlite3Fts5BufferAppendString( 3813 int *pRc, 3814 Fts5Buffer *pBuf, 3815 const char *zStr 3816 ){ 3817 int nStr = (int)strlen(zStr); 3818 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nStr+1, (const u8*)zStr); 3819 pBuf->n--; 3820 } 3821 3822 /* 3823 ** Argument zFmt is a printf() style format string. This function performs 3824 ** the printf() style processing, then appends the results to buffer pBuf. 3825 ** 3826 ** Like sqlite3Fts5BufferAppendString(), this function ensures that the byte 3827 ** following the buffer data is set to 0x00, even though this byte is not 3828 ** included in the pBuf->n count. 3829 */ 3830 static void sqlite3Fts5BufferAppendPrintf( 3831 int *pRc, 3832 Fts5Buffer *pBuf, 3833 char *zFmt, ... 3834 ){ 3835 if( *pRc==SQLITE_OK ){ 3836 char *zTmp; 3837 va_list ap; 3838 va_start(ap, zFmt); 3839 zTmp = sqlite3_vmprintf(zFmt, ap); 3840 va_end(ap); 3841 3842 if( zTmp==0 ){ 3843 *pRc = SQLITE_NOMEM; 3844 }else{ 3845 sqlite3Fts5BufferAppendString(pRc, pBuf, zTmp); 3846 sqlite3_free(zTmp); 3847 } 3848 } 3849 } 3850 3851 static char *sqlite3Fts5Mprintf(int *pRc, const char *zFmt, ...){ 3852 char *zRet = 0; 3853 if( *pRc==SQLITE_OK ){ 3854 va_list ap; 3855 va_start(ap, zFmt); 3856 zRet = sqlite3_vmprintf(zFmt, ap); 3857 va_end(ap); 3858 if( zRet==0 ){ 3859 *pRc = SQLITE_NOMEM; 3860 } 3861 } 3862 return zRet; 3863 } 3864 3865 3866 /* 3867 ** Free any buffer allocated by pBuf. Zero the structure before returning. 3868 */ 3869 static void sqlite3Fts5BufferFree(Fts5Buffer *pBuf){ 3870 sqlite3_free(pBuf->p); 3871 memset(pBuf, 0, sizeof(Fts5Buffer)); 3872 } 3873 3874 /* 3875 ** Zero the contents of the buffer object. But do not free the associated 3876 ** memory allocation. 3877 */ 3878 static void sqlite3Fts5BufferZero(Fts5Buffer *pBuf){ 3879 pBuf->n = 0; 3880 } 3881 3882 /* 3883 ** Set the buffer to contain nData/pData. If an OOM error occurs, leave an 3884 ** the error code in p. If an error has already occurred when this function 3885 ** is called, it is a no-op. 3886 */ 3887 static void sqlite3Fts5BufferSet( 3888 int *pRc, 3889 Fts5Buffer *pBuf, 3890 int nData, 3891 const u8 *pData 3892 ){ 3893 pBuf->n = 0; 3894 sqlite3Fts5BufferAppendBlob(pRc, pBuf, nData, pData); 3895 } 3896 3897 static int sqlite3Fts5PoslistNext64( 3898 const u8 *a, int n, /* Buffer containing poslist */ 3899 int *pi, /* IN/OUT: Offset within a[] */ 3900 i64 *piOff /* IN/OUT: Current offset */ 3901 ){ 3902 int i = *pi; 3903 if( i>=n ){ 3904 /* EOF */ 3905 *piOff = -1; 3906 return 1; 3907 }else{ 3908 i64 iOff = *piOff; 3909 u32 iVal; 3910 fts5FastGetVarint32(a, i, iVal); 3911 if( iVal<=1 ){ 3912 if( iVal==0 ){ 3913 *pi = i; 3914 return 0; 3915 } 3916 fts5FastGetVarint32(a, i, iVal); 3917 iOff = ((i64)iVal) << 32; 3918 assert( iOff>=0 ); 3919 fts5FastGetVarint32(a, i, iVal); 3920 if( iVal<2 ){ 3921 /* This is a corrupt record. So stop parsing it here. */ 3922 *piOff = -1; 3923 return 1; 3924 } 3925 *piOff = iOff + ((iVal-2) & 0x7FFFFFFF); 3926 }else{ 3927 *piOff = (iOff & (i64)0x7FFFFFFF<<32)+((iOff + (iVal-2)) & 0x7FFFFFFF); 3928 } 3929 *pi = i; 3930 assert_nc( *piOff>=iOff ); 3931 return 0; 3932 } 3933 } 3934 3935 3936 /* 3937 ** Advance the iterator object passed as the only argument. Return true 3938 ** if the iterator reaches EOF, or false otherwise. 3939 */ 3940 static int sqlite3Fts5PoslistReaderNext(Fts5PoslistReader *pIter){ 3941 if( sqlite3Fts5PoslistNext64(pIter->a, pIter->n, &pIter->i, &pIter->iPos) ){ 3942 pIter->bEof = 1; 3943 } 3944 return pIter->bEof; 3945 } 3946 3947 static int sqlite3Fts5PoslistReaderInit( 3948 const u8 *a, int n, /* Poslist buffer to iterate through */ 3949 Fts5PoslistReader *pIter /* Iterator object to initialize */ 3950 ){ 3951 memset(pIter, 0, sizeof(*pIter)); 3952 pIter->a = a; 3953 pIter->n = n; 3954 sqlite3Fts5PoslistReaderNext(pIter); 3955 return pIter->bEof; 3956 } 3957 3958 /* 3959 ** Append position iPos to the position list being accumulated in buffer 3960 ** pBuf, which must be already be large enough to hold the new data. 3961 ** The previous position written to this list is *piPrev. *piPrev is set 3962 ** to iPos before returning. 3963 */ 3964 static void sqlite3Fts5PoslistSafeAppend( 3965 Fts5Buffer *pBuf, 3966 i64 *piPrev, 3967 i64 iPos 3968 ){ 3969 if( iPos>=*piPrev ){ 3970 static const i64 colmask = ((i64)(0x7FFFFFFF)) << 32; 3971 if( (iPos & colmask) != (*piPrev & colmask) ){ 3972 pBuf->p[pBuf->n++] = 1; 3973 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos>>32)); 3974 *piPrev = (iPos & colmask); 3975 } 3976 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], (iPos-*piPrev)+2); 3977 *piPrev = iPos; 3978 } 3979 } 3980 3981 static int sqlite3Fts5PoslistWriterAppend( 3982 Fts5Buffer *pBuf, 3983 Fts5PoslistWriter *pWriter, 3984 i64 iPos 3985 ){ 3986 int rc = 0; /* Initialized only to suppress erroneous warning from Clang */ 3987 if( fts5BufferGrow(&rc, pBuf, 5+5+5) ) return rc; 3988 sqlite3Fts5PoslistSafeAppend(pBuf, &pWriter->iPrev, iPos); 3989 return SQLITE_OK; 3990 } 3991 3992 static void *sqlite3Fts5MallocZero(int *pRc, sqlite3_int64 nByte){ 3993 void *pRet = 0; 3994 if( *pRc==SQLITE_OK ){ 3995 pRet = sqlite3_malloc64(nByte); 3996 if( pRet==0 ){ 3997 if( nByte>0 ) *pRc = SQLITE_NOMEM; 3998 }else{ 3999 memset(pRet, 0, (size_t)nByte); 4000 } 4001 } 4002 return pRet; 4003 } 4004 4005 /* 4006 ** Return a nul-terminated copy of the string indicated by pIn. If nIn 4007 ** is non-negative, then it is the length of the string in bytes. Otherwise, 4008 ** the length of the string is determined using strlen(). 4009 ** 4010 ** It is the responsibility of the caller to eventually free the returned 4011 ** buffer using sqlite3_free(). If an OOM error occurs, NULL is returned. 4012 */ 4013 static char *sqlite3Fts5Strndup(int *pRc, const char *pIn, int nIn){ 4014 char *zRet = 0; 4015 if( *pRc==SQLITE_OK ){ 4016 if( nIn<0 ){ 4017 nIn = (int)strlen(pIn); 4018 } 4019 zRet = (char*)sqlite3_malloc(nIn+1); 4020 if( zRet ){ 4021 memcpy(zRet, pIn, nIn); 4022 zRet[nIn] = '\0'; 4023 }else{ 4024 *pRc = SQLITE_NOMEM; 4025 } 4026 } 4027 return zRet; 4028 } 4029 4030 4031 /* 4032 ** Return true if character 't' may be part of an FTS5 bareword, or false 4033 ** otherwise. Characters that may be part of barewords: 4034 ** 4035 ** * All non-ASCII characters, 4036 ** * The 52 upper and lower case ASCII characters, and 4037 ** * The 10 integer ASCII characters. 4038 ** * The underscore character "_" (0x5F). 4039 ** * The unicode "subsitute" character (0x1A). 4040 */ 4041 static int sqlite3Fts5IsBareword(char t){ 4042 u8 aBareword[128] = { 4043 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 .. 0x0F */ 4044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /* 0x10 .. 0x1F */ 4045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 .. 0x2F */ 4046 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30 .. 0x3F */ 4047 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40 .. 0x4F */ 4048 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50 .. 0x5F */ 4049 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60 .. 0x6F */ 4050 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70 .. 0x7F */ 4051 }; 4052 4053 return (t & 0x80) || aBareword[(int)t]; 4054 } 4055 4056 4057 /************************************************************************* 4058 */ 4059 typedef struct Fts5TermsetEntry Fts5TermsetEntry; 4060 struct Fts5TermsetEntry { 4061 char *pTerm; 4062 int nTerm; 4063 int iIdx; /* Index (main or aPrefix[] entry) */ 4064 Fts5TermsetEntry *pNext; 4065 }; 4066 4067 struct Fts5Termset { 4068 Fts5TermsetEntry *apHash[512]; 4069 }; 4070 4071 static int sqlite3Fts5TermsetNew(Fts5Termset **pp){ 4072 int rc = SQLITE_OK; 4073 *pp = sqlite3Fts5MallocZero(&rc, sizeof(Fts5Termset)); 4074 return rc; 4075 } 4076 4077 static int sqlite3Fts5TermsetAdd( 4078 Fts5Termset *p, 4079 int iIdx, 4080 const char *pTerm, int nTerm, 4081 int *pbPresent 4082 ){ 4083 int rc = SQLITE_OK; 4084 *pbPresent = 0; 4085 if( p ){ 4086 int i; 4087 u32 hash = 13; 4088 Fts5TermsetEntry *pEntry; 4089 4090 /* Calculate a hash value for this term. This is the same hash checksum 4091 ** used by the fts5_hash.c module. This is not important for correct 4092 ** operation of the module, but is necessary to ensure that some tests 4093 ** designed to produce hash table collisions really do work. */ 4094 for(i=nTerm-1; i>=0; i--){ 4095 hash = (hash << 3) ^ hash ^ pTerm[i]; 4096 } 4097 hash = (hash << 3) ^ hash ^ iIdx; 4098 hash = hash % ArraySize(p->apHash); 4099 4100 for(pEntry=p->apHash[hash]; pEntry; pEntry=pEntry->pNext){ 4101 if( pEntry->iIdx==iIdx 4102 && pEntry->nTerm==nTerm 4103 && memcmp(pEntry->pTerm, pTerm, nTerm)==0 4104 ){ 4105 *pbPresent = 1; 4106 break; 4107 } 4108 } 4109 4110 if( pEntry==0 ){ 4111 pEntry = sqlite3Fts5MallocZero(&rc, sizeof(Fts5TermsetEntry) + nTerm); 4112 if( pEntry ){ 4113 pEntry->pTerm = (char*)&pEntry[1]; 4114 pEntry->nTerm = nTerm; 4115 pEntry->iIdx = iIdx; 4116 memcpy(pEntry->pTerm, pTerm, nTerm); 4117 pEntry->pNext = p->apHash[hash]; 4118 p->apHash[hash] = pEntry; 4119 } 4120 } 4121 } 4122 4123 return rc; 4124 } 4125 4126 static void sqlite3Fts5TermsetFree(Fts5Termset *p){ 4127 if( p ){ 4128 u32 i; 4129 for(i=0; i<ArraySize(p->apHash); i++){ 4130 Fts5TermsetEntry *pEntry = p->apHash[i]; 4131 while( pEntry ){ 4132 Fts5TermsetEntry *pDel = pEntry; 4133 pEntry = pEntry->pNext; 4134 sqlite3_free(pDel); 4135 } 4136 } 4137 sqlite3_free(p); 4138 } 4139 } 4140 4141 #line 1 "fts5_config.c" 4142 /* 4143 ** 2014 Jun 09 4144 ** 4145 ** The author disclaims copyright to this source code. In place of 4146 ** a legal notice, here is a blessing: 4147 ** 4148 ** May you do good and not evil. 4149 ** May you find forgiveness for yourself and forgive others. 4150 ** May you share freely, never taking more than you give. 4151 ** 4152 ****************************************************************************** 4153 ** 4154 ** This is an SQLite module implementing full-text search. 4155 */ 4156 4157 4158 /* #include "fts5Int.h" */ 4159 4160 #define FTS5_DEFAULT_PAGE_SIZE 4050 4161 #define FTS5_DEFAULT_AUTOMERGE 4 4162 #define FTS5_DEFAULT_USERMERGE 4 4163 #define FTS5_DEFAULT_CRISISMERGE 16 4164 #define FTS5_DEFAULT_HASHSIZE (1024*1024) 4165 4166 /* Maximum allowed page size */ 4167 #define FTS5_MAX_PAGE_SIZE (64*1024) 4168 4169 static int fts5_iswhitespace(char x){ 4170 return (x==' '); 4171 } 4172 4173 static int fts5_isopenquote(char x){ 4174 return (x=='"' || x=='\'' || x=='[' || x=='`'); 4175 } 4176 4177 /* 4178 ** Argument pIn points to a character that is part of a nul-terminated 4179 ** string. Return a pointer to the first character following *pIn in 4180 ** the string that is not a white-space character. 4181 */ 4182 static const char *fts5ConfigSkipWhitespace(const char *pIn){ 4183 const char *p = pIn; 4184 if( p ){ 4185 while( fts5_iswhitespace(*p) ){ p++; } 4186 } 4187 return p; 4188 } 4189 4190 /* 4191 ** Argument pIn points to a character that is part of a nul-terminated 4192 ** string. Return a pointer to the first character following *pIn in 4193 ** the string that is not a "bareword" character. 4194 */ 4195 static const char *fts5ConfigSkipBareword(const char *pIn){ 4196 const char *p = pIn; 4197 while ( sqlite3Fts5IsBareword(*p) ) p++; 4198 if( p==pIn ) p = 0; 4199 return p; 4200 } 4201 4202 static int fts5_isdigit(char a){ 4203 return (a>='0' && a<='9'); 4204 } 4205 4206 4207 4208 static const char *fts5ConfigSkipLiteral(const char *pIn){ 4209 const char *p = pIn; 4210 switch( *p ){ 4211 case 'n': case 'N': 4212 if( sqlite3_strnicmp("null", p, 4)==0 ){ 4213 p = &p[4]; 4214 }else{ 4215 p = 0; 4216 } 4217 break; 4218 4219 case 'x': case 'X': 4220 p++; 4221 if( *p=='\'' ){ 4222 p++; 4223 while( (*p>='a' && *p<='f') 4224 || (*p>='A' && *p<='F') 4225 || (*p>='0' && *p<='9') 4226 ){ 4227 p++; 4228 } 4229 if( *p=='\'' && 0==((p-pIn)%2) ){ 4230 p++; 4231 }else{ 4232 p = 0; 4233 } 4234 }else{ 4235 p = 0; 4236 } 4237 break; 4238 4239 case '\'': 4240 p++; 4241 while( p ){ 4242 if( *p=='\'' ){ 4243 p++; 4244 if( *p!='\'' ) break; 4245 } 4246 p++; 4247 if( *p==0 ) p = 0; 4248 } 4249 break; 4250 4251 default: 4252 /* maybe a number */ 4253 if( *p=='+' || *p=='-' ) p++; 4254 while( fts5_isdigit(*p) ) p++; 4255 4256 /* At this point, if the literal was an integer, the parse is 4257 ** finished. Or, if it is a floating point value, it may continue 4258 ** with either a decimal point or an 'E' character. */ 4259 if( *p=='.' && fts5_isdigit(p[1]) ){ 4260 p += 2; 4261 while( fts5_isdigit(*p) ) p++; 4262 } 4263 if( p==pIn ) p = 0; 4264 4265 break; 4266 } 4267 4268 return p; 4269 } 4270 4271 /* 4272 ** The first character of the string pointed to by argument z is guaranteed 4273 ** to be an open-quote character (see function fts5_isopenquote()). 4274 ** 4275 ** This function searches for the corresponding close-quote character within 4276 ** the string and, if found, dequotes the string in place and adds a new 4277 ** nul-terminator byte. 4278 ** 4279 ** If the close-quote is found, the value returned is the byte offset of 4280 ** the character immediately following it. Or, if the close-quote is not 4281 ** found, -1 is returned. If -1 is returned, the buffer is left in an 4282 ** undefined state. 4283 */ 4284 static int fts5Dequote(char *z){ 4285 char q; 4286 int iIn = 1; 4287 int iOut = 0; 4288 q = z[0]; 4289 4290 /* Set stack variable q to the close-quote character */ 4291 assert( q=='[' || q=='\'' || q=='"' || q=='`' ); 4292 if( q=='[' ) q = ']'; 4293 4294 while( z[iIn] ){ 4295 if( z[iIn]==q ){ 4296 if( z[iIn+1]!=q ){ 4297 /* Character iIn was the close quote. */ 4298 iIn++; 4299 break; 4300 }else{ 4301 /* Character iIn and iIn+1 form an escaped quote character. Skip 4302 ** the input cursor past both and copy a single quote character 4303 ** to the output buffer. */ 4304 iIn += 2; 4305 z[iOut++] = q; 4306 } 4307 }else{ 4308 z[iOut++] = z[iIn++]; 4309 } 4310 } 4311 4312 z[iOut] = '\0'; 4313 return iIn; 4314 } 4315 4316 /* 4317 ** Convert an SQL-style quoted string into a normal string by removing 4318 ** the quote characters. The conversion is done in-place. If the 4319 ** input does not begin with a quote character, then this routine 4320 ** is a no-op. 4321 ** 4322 ** Examples: 4323 ** 4324 ** "abc" becomes abc 4325 ** 'xyz' becomes xyz 4326 ** [pqr] becomes pqr 4327 ** `mno` becomes mno 4328 */ 4329 static void sqlite3Fts5Dequote(char *z){ 4330 char quote; /* Quote character (if any ) */ 4331 4332 assert( 0==fts5_iswhitespace(z[0]) ); 4333 quote = z[0]; 4334 if( quote=='[' || quote=='\'' || quote=='"' || quote=='`' ){ 4335 fts5Dequote(z); 4336 } 4337 } 4338 4339 4340 struct Fts5Enum { 4341 const char *zName; 4342 int eVal; 4343 }; 4344 typedef struct Fts5Enum Fts5Enum; 4345 4346 static int fts5ConfigSetEnum( 4347 const Fts5Enum *aEnum, 4348 const char *zEnum, 4349 int *peVal 4350 ){ 4351 int nEnum = (int)strlen(zEnum); 4352 int i; 4353 int iVal = -1; 4354 4355 for(i=0; aEnum[i].zName; i++){ 4356 if( sqlite3_strnicmp(aEnum[i].zName, zEnum, nEnum)==0 ){ 4357 if( iVal>=0 ) return SQLITE_ERROR; 4358 iVal = aEnum[i].eVal; 4359 } 4360 } 4361 4362 *peVal = iVal; 4363 return iVal<0 ? SQLITE_ERROR : SQLITE_OK; 4364 } 4365 4366 /* 4367 ** Parse a "special" CREATE VIRTUAL TABLE directive and update 4368 ** configuration object pConfig as appropriate. 4369 ** 4370 ** If successful, object pConfig is updated and SQLITE_OK returned. If 4371 ** an error occurs, an SQLite error code is returned and an error message 4372 ** may be left in *pzErr. It is the responsibility of the caller to 4373 ** eventually free any such error message using sqlite3_free(). 4374 */ 4375 static int fts5ConfigParseSpecial( 4376 Fts5Global *pGlobal, 4377 Fts5Config *pConfig, /* Configuration object to update */ 4378 const char *zCmd, /* Special command to parse */ 4379 const char *zArg, /* Argument to parse */ 4380 char **pzErr /* OUT: Error message */ 4381 ){ 4382 int rc = SQLITE_OK; 4383 int nCmd = (int)strlen(zCmd); 4384 if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ 4385 const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; 4386 const char *p; 4387 int bFirst = 1; 4388 if( pConfig->aPrefix==0 ){ 4389 pConfig->aPrefix = sqlite3Fts5MallocZero(&rc, nByte); 4390 if( rc ) return rc; 4391 } 4392 4393 p = zArg; 4394 while( 1 ){ 4395 int nPre = 0; 4396 4397 while( p[0]==' ' ) p++; 4398 if( bFirst==0 && p[0]==',' ){ 4399 p++; 4400 while( p[0]==' ' ) p++; 4401 }else if( p[0]=='\0' ){ 4402 break; 4403 } 4404 if( p[0]<'0' || p[0]>'9' ){ 4405 *pzErr = sqlite3_mprintf("malformed prefix=... directive"); 4406 rc = SQLITE_ERROR; 4407 break; 4408 } 4409 4410 if( pConfig->nPrefix==FTS5_MAX_PREFIX_INDEXES ){ 4411 *pzErr = sqlite3_mprintf( 4412 "too many prefix indexes (max %d)", FTS5_MAX_PREFIX_INDEXES 4413 ); 4414 rc = SQLITE_ERROR; 4415 break; 4416 } 4417 4418 while( p[0]>='0' && p[0]<='9' && nPre<1000 ){ 4419 nPre = nPre*10 + (p[0] - '0'); 4420 p++; 4421 } 4422 4423 if( nPre<=0 || nPre>=1000 ){ 4424 *pzErr = sqlite3_mprintf("prefix length out of range (max 999)"); 4425 rc = SQLITE_ERROR; 4426 break; 4427 } 4428 4429 pConfig->aPrefix[pConfig->nPrefix] = nPre; 4430 pConfig->nPrefix++; 4431 bFirst = 0; 4432 } 4433 assert( pConfig->nPrefix<=FTS5_MAX_PREFIX_INDEXES ); 4434 return rc; 4435 } 4436 4437 if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ 4438 const char *p = (const char*)zArg; 4439 sqlite3_int64 nArg = strlen(zArg) + 1; 4440 char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); 4441 char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); 4442 char *pSpace = pDel; 4443 4444 if( azArg && pSpace ){ 4445 if( pConfig->pTok ){ 4446 *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); 4447 rc = SQLITE_ERROR; 4448 }else{ 4449 for(nArg=0; p && *p; nArg++){ 4450 const char *p2 = fts5ConfigSkipWhitespace(p); 4451 if( *p2=='\'' ){ 4452 p = fts5ConfigSkipLiteral(p2); 4453 }else{ 4454 p = fts5ConfigSkipBareword(p2); 4455 } 4456 if( p ){ 4457 memcpy(pSpace, p2, p-p2); 4458 azArg[nArg] = pSpace; 4459 sqlite3Fts5Dequote(pSpace); 4460 pSpace += (p - p2) + 1; 4461 p = fts5ConfigSkipWhitespace(p); 4462 } 4463 } 4464 if( p==0 ){ 4465 *pzErr = sqlite3_mprintf("parse error in tokenize directive"); 4466 rc = SQLITE_ERROR; 4467 }else{ 4468 rc = sqlite3Fts5GetTokenizer(pGlobal, 4469 (const char**)azArg, (int)nArg, pConfig, 4470 pzErr 4471 ); 4472 } 4473 } 4474 } 4475 4476 sqlite3_free(azArg); 4477 sqlite3_free(pDel); 4478 return rc; 4479 } 4480 4481 if( sqlite3_strnicmp("content", zCmd, nCmd)==0 ){ 4482 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ 4483 *pzErr = sqlite3_mprintf("multiple content=... directives"); 4484 rc = SQLITE_ERROR; 4485 }else{ 4486 if( zArg[0] ){ 4487 pConfig->eContent = FTS5_CONTENT_EXTERNAL; 4488 pConfig->zContent = sqlite3Fts5Mprintf(&rc, "%Q.%Q", pConfig->zDb,zArg); 4489 }else{ 4490 pConfig->eContent = FTS5_CONTENT_NONE; 4491 } 4492 } 4493 return rc; 4494 } 4495 4496 if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ 4497 if( pConfig->zContentRowid ){ 4498 *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); 4499 rc = SQLITE_ERROR; 4500 }else{ 4501 pConfig->zContentRowid = sqlite3Fts5Strndup(&rc, zArg, -1); 4502 } 4503 return rc; 4504 } 4505 4506 if( sqlite3_strnicmp("columnsize", zCmd, nCmd)==0 ){ 4507 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ 4508 *pzErr = sqlite3_mprintf("malformed columnsize=... directive"); 4509 rc = SQLITE_ERROR; 4510 }else{ 4511 pConfig->bColumnsize = (zArg[0]=='1'); 4512 } 4513 return rc; 4514 } 4515 4516 if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){ 4517 const Fts5Enum aDetail[] = { 4518 { "none", FTS5_DETAIL_NONE }, 4519 { "full", FTS5_DETAIL_FULL }, 4520 { "columns", FTS5_DETAIL_COLUMNS }, 4521 { 0, 0 } 4522 }; 4523 4524 if( (rc = fts5ConfigSetEnum(aDetail, zArg, &pConfig->eDetail)) ){ 4525 *pzErr = sqlite3_mprintf("malformed detail=... directive"); 4526 } 4527 return rc; 4528 } 4529 4530 *pzErr = sqlite3_mprintf("unrecognized option: \"%.*s\"", nCmd, zCmd); 4531 return SQLITE_ERROR; 4532 } 4533 4534 /* 4535 ** Allocate an instance of the default tokenizer ("simple") at 4536 ** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error 4537 ** code if an error occurs. 4538 */ 4539 static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ 4540 assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); 4541 return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0); 4542 } 4543 4544 /* 4545 ** Gobble up the first bareword or quoted word from the input buffer zIn. 4546 ** Return a pointer to the character immediately following the last in 4547 ** the gobbled word if successful, or a NULL pointer otherwise (failed 4548 ** to find close-quote character). 4549 ** 4550 ** Before returning, set pzOut to point to a new buffer containing a 4551 ** nul-terminated, dequoted copy of the gobbled word. If the word was 4552 ** quoted, *pbQuoted is also set to 1 before returning. 4553 ** 4554 ** If *pRc is other than SQLITE_OK when this function is called, it is 4555 ** a no-op (NULL is returned). Otherwise, if an OOM occurs within this 4556 ** function, *pRc is set to SQLITE_NOMEM before returning. *pRc is *not* 4557 ** set if a parse error (failed to find close quote) occurs. 4558 */ 4559 static const char *fts5ConfigGobbleWord( 4560 int *pRc, /* IN/OUT: Error code */ 4561 const char *zIn, /* Buffer to gobble string/bareword from */ 4562 char **pzOut, /* OUT: malloc'd buffer containing str/bw */ 4563 int *pbQuoted /* OUT: Set to true if dequoting required */ 4564 ){ 4565 const char *zRet = 0; 4566 4567 sqlite3_int64 nIn = strlen(zIn); 4568 char *zOut = sqlite3_malloc64(nIn+1); 4569 4570 assert( *pRc==SQLITE_OK ); 4571 *pbQuoted = 0; 4572 *pzOut = 0; 4573 4574 if( zOut==0 ){ 4575 *pRc = SQLITE_NOMEM; 4576 }else{ 4577 memcpy(zOut, zIn, (size_t)(nIn+1)); 4578 if( fts5_isopenquote(zOut[0]) ){ 4579 int ii = fts5Dequote(zOut); 4580 zRet = &zIn[ii]; 4581 *pbQuoted = 1; 4582 }else{ 4583 zRet = fts5ConfigSkipBareword(zIn); 4584 if( zRet ){ 4585 zOut[zRet-zIn] = '\0'; 4586 } 4587 } 4588 } 4589 4590 if( zRet==0 ){ 4591 sqlite3_free(zOut); 4592 }else{ 4593 *pzOut = zOut; 4594 } 4595 4596 return zRet; 4597 } 4598 4599 static int fts5ConfigParseColumn( 4600 Fts5Config *p, 4601 char *zCol, 4602 char *zArg, 4603 char **pzErr 4604 ){ 4605 int rc = SQLITE_OK; 4606 if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) 4607 || 0==sqlite3_stricmp(zCol, FTS5_ROWID_NAME) 4608 ){ 4609 *pzErr = sqlite3_mprintf("reserved fts5 column name: %s", zCol); 4610 rc = SQLITE_ERROR; 4611 }else if( zArg ){ 4612 if( 0==sqlite3_stricmp(zArg, "unindexed") ){ 4613 p->abUnindexed[p->nCol] = 1; 4614 }else{ 4615 *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); 4616 rc = SQLITE_ERROR; 4617 } 4618 } 4619 4620 p->azCol[p->nCol++] = zCol; 4621 return rc; 4622 } 4623 4624 /* 4625 ** Populate the Fts5Config.zContentExprlist string. 4626 */ 4627 static int fts5ConfigMakeExprlist(Fts5Config *p){ 4628 int i; 4629 int rc = SQLITE_OK; 4630 Fts5Buffer buf = {0, 0, 0}; 4631 4632 sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); 4633 if( p->eContent!=FTS5_CONTENT_NONE ){ 4634 for(i=0; i<p->nCol; i++){ 4635 if( p->eContent==FTS5_CONTENT_EXTERNAL ){ 4636 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); 4637 }else{ 4638 sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); 4639 } 4640 } 4641 } 4642 4643 assert( p->zContentExprlist==0 ); 4644 p->zContentExprlist = (char*)buf.p; 4645 return rc; 4646 } 4647 4648 /* 4649 ** Arguments nArg/azArg contain the string arguments passed to the xCreate 4650 ** or xConnect method of the virtual table. This function attempts to 4651 ** allocate an instance of Fts5Config containing the results of parsing 4652 ** those arguments. 4653 ** 4654 ** If successful, SQLITE_OK is returned and *ppOut is set to point to the 4655 ** new Fts5Config object. If an error occurs, an SQLite error code is 4656 ** returned, *ppOut is set to NULL and an error message may be left in 4657 ** *pzErr. It is the responsibility of the caller to eventually free any 4658 ** such error message using sqlite3_free(). 4659 */ 4660 static int sqlite3Fts5ConfigParse( 4661 Fts5Global *pGlobal, 4662 sqlite3 *db, 4663 int nArg, /* Number of arguments */ 4664 const char **azArg, /* Array of nArg CREATE VIRTUAL TABLE args */ 4665 Fts5Config **ppOut, /* OUT: Results of parse */ 4666 char **pzErr /* OUT: Error message */ 4667 ){ 4668 int rc = SQLITE_OK; /* Return code */ 4669 Fts5Config *pRet; /* New object to return */ 4670 int i; 4671 sqlite3_int64 nByte; 4672 4673 *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); 4674 if( pRet==0 ) return SQLITE_NOMEM; 4675 memset(pRet, 0, sizeof(Fts5Config)); 4676 pRet->db = db; 4677 pRet->iCookie = -1; 4678 4679 nByte = nArg * (sizeof(char*) + sizeof(u8)); 4680 pRet->azCol = (char**)sqlite3Fts5MallocZero(&rc, nByte); 4681 pRet->abUnindexed = pRet->azCol ? (u8*)&pRet->azCol[nArg] : 0; 4682 pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); 4683 pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); 4684 pRet->bColumnsize = 1; 4685 pRet->eDetail = FTS5_DETAIL_FULL; 4686 #ifdef SQLITE_DEBUG 4687 pRet->bPrefixIndex = 1; 4688 #endif 4689 if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ 4690 *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); 4691 rc = SQLITE_ERROR; 4692 } 4693 4694 for(i=3; rc==SQLITE_OK && i<nArg; i++){ 4695 const char *zOrig = azArg[i]; 4696 const char *z; 4697 char *zOne = 0; 4698 char *zTwo = 0; 4699 int bOption = 0; 4700 int bMustBeCol = 0; 4701 4702 z = fts5ConfigGobbleWord(&rc, zOrig, &zOne, &bMustBeCol); 4703 z = fts5ConfigSkipWhitespace(z); 4704 if( z && *z=='=' ){ 4705 bOption = 1; 4706 assert( zOne!=0 ); 4707 z++; 4708 if( bMustBeCol ) z = 0; 4709 } 4710 z = fts5ConfigSkipWhitespace(z); 4711 if( z && z[0] ){ 4712 int bDummy; 4713 z = fts5ConfigGobbleWord(&rc, z, &zTwo, &bDummy); 4714 if( z && z[0] ) z = 0; 4715 } 4716 4717 if( rc==SQLITE_OK ){ 4718 if( z==0 ){ 4719 *pzErr = sqlite3_mprintf("parse error in \"%s\"", zOrig); 4720 rc = SQLITE_ERROR; 4721 }else{ 4722 if( bOption ){ 4723 rc = fts5ConfigParseSpecial(pGlobal, pRet, 4724 ALWAYS(zOne)?zOne:"", 4725 zTwo?zTwo:"", 4726 pzErr 4727 ); 4728 }else{ 4729 rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); 4730 zOne = 0; 4731 } 4732 } 4733 } 4734 4735 sqlite3_free(zOne); 4736 sqlite3_free(zTwo); 4737 } 4738 4739 /* If a tokenizer= option was successfully parsed, the tokenizer has 4740 ** already been allocated. Otherwise, allocate an instance of the default 4741 ** tokenizer (unicode61) now. */ 4742 if( rc==SQLITE_OK && pRet->pTok==0 ){ 4743 rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); 4744 } 4745 4746 /* If no zContent option was specified, fill in the default values. */ 4747 if( rc==SQLITE_OK && pRet->zContent==0 ){ 4748 const char *zTail = 0; 4749 assert( pRet->eContent==FTS5_CONTENT_NORMAL 4750 || pRet->eContent==FTS5_CONTENT_NONE 4751 ); 4752 if( pRet->eContent==FTS5_CONTENT_NORMAL ){ 4753 zTail = "content"; 4754 }else if( pRet->bColumnsize ){ 4755 zTail = "docsize"; 4756 } 4757 4758 if( zTail ){ 4759 pRet->zContent = sqlite3Fts5Mprintf( 4760 &rc, "%Q.'%q_%s'", pRet->zDb, pRet->zName, zTail 4761 ); 4762 } 4763 } 4764 4765 if( rc==SQLITE_OK && pRet->zContentRowid==0 ){ 4766 pRet->zContentRowid = sqlite3Fts5Strndup(&rc, "rowid", -1); 4767 } 4768 4769 /* Formulate the zContentExprlist text */ 4770 if( rc==SQLITE_OK ){ 4771 rc = fts5ConfigMakeExprlist(pRet); 4772 } 4773 4774 if( rc!=SQLITE_OK ){ 4775 sqlite3Fts5ConfigFree(pRet); 4776 *ppOut = 0; 4777 } 4778 return rc; 4779 } 4780 4781 /* 4782 ** Free the configuration object passed as the only argument. 4783 */ 4784 static void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ 4785 if( pConfig ){ 4786 int i; 4787 if( pConfig->pTok ){ 4788 pConfig->pTokApi->xDelete(pConfig->pTok); 4789 } 4790 sqlite3_free(pConfig->zDb); 4791 sqlite3_free(pConfig->zName); 4792 for(i=0; i<pConfig->nCol; i++){ 4793 sqlite3_free(pConfig->azCol[i]); 4794 } 4795 sqlite3_free(pConfig->azCol); 4796 sqlite3_free(pConfig->aPrefix); 4797 sqlite3_free(pConfig->zRank); 4798 sqlite3_free(pConfig->zRankArgs); 4799 sqlite3_free(pConfig->zContent); 4800 sqlite3_free(pConfig->zContentRowid); 4801 sqlite3_free(pConfig->zContentExprlist); 4802 sqlite3_free(pConfig); 4803 } 4804 } 4805 4806 /* 4807 ** Call sqlite3_declare_vtab() based on the contents of the configuration 4808 ** object passed as the only argument. Return SQLITE_OK if successful, or 4809 ** an SQLite error code if an error occurs. 4810 */ 4811 static int sqlite3Fts5ConfigDeclareVtab(Fts5Config *pConfig){ 4812 int i; 4813 int rc = SQLITE_OK; 4814 char *zSql; 4815 4816 zSql = sqlite3Fts5Mprintf(&rc, "CREATE TABLE x("); 4817 for(i=0; zSql && i<pConfig->nCol; i++){ 4818 const char *zSep = (i==0?"":", "); 4819 zSql = sqlite3Fts5Mprintf(&rc, "%z%s%Q", zSql, zSep, pConfig->azCol[i]); 4820 } 4821 zSql = sqlite3Fts5Mprintf(&rc, "%z, %Q HIDDEN, %s HIDDEN)", 4822 zSql, pConfig->zName, FTS5_RANK_NAME 4823 ); 4824 4825 assert( zSql || rc==SQLITE_NOMEM ); 4826 if( zSql ){ 4827 rc = sqlite3_declare_vtab(pConfig->db, zSql); 4828 sqlite3_free(zSql); 4829 } 4830 4831 return rc; 4832 } 4833 4834 /* 4835 ** Tokenize the text passed via the second and third arguments. 4836 ** 4837 ** The callback is invoked once for each token in the input text. The 4838 ** arguments passed to it are, in order: 4839 ** 4840 ** void *pCtx // Copy of 4th argument to sqlite3Fts5Tokenize() 4841 ** const char *pToken // Pointer to buffer containing token 4842 ** int nToken // Size of token in bytes 4843 ** int iStart // Byte offset of start of token within input text 4844 ** int iEnd // Byte offset of end of token within input text 4845 ** int iPos // Position of token in input (first token is 0) 4846 ** 4847 ** If the callback returns a non-zero value the tokenization is abandoned 4848 ** and no further callbacks are issued. 4849 ** 4850 ** This function returns SQLITE_OK if successful or an SQLite error code 4851 ** if an error occurs. If the tokenization was abandoned early because 4852 ** the callback returned SQLITE_DONE, this is not an error and this function 4853 ** still returns SQLITE_OK. Or, if the tokenization was abandoned early 4854 ** because the callback returned another non-zero value, it is assumed 4855 ** to be an SQLite error code and returned to the caller. 4856 */ 4857 static int sqlite3Fts5Tokenize( 4858 Fts5Config *pConfig, /* FTS5 Configuration object */ 4859 int flags, /* FTS5_TOKENIZE_* flags */ 4860 const char *pText, int nText, /* Text to tokenize */ 4861 void *pCtx, /* Context passed to xToken() */ 4862 int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ 4863 ){ 4864 if( pText==0 ) return SQLITE_OK; 4865 return pConfig->pTokApi->xTokenize( 4866 pConfig->pTok, pCtx, flags, pText, nText, xToken 4867 ); 4868 } 4869 4870 /* 4871 ** Argument pIn points to the first character in what is expected to be 4872 ** a comma-separated list of SQL literals followed by a ')' character. 4873 ** If it actually is this, return a pointer to the ')'. Otherwise, return 4874 ** NULL to indicate a parse error. 4875 */ 4876 static const char *fts5ConfigSkipArgs(const char *pIn){ 4877 const char *p = pIn; 4878 4879 while( 1 ){ 4880 p = fts5ConfigSkipWhitespace(p); 4881 p = fts5ConfigSkipLiteral(p); 4882 p = fts5ConfigSkipWhitespace(p); 4883 if( p==0 || *p==')' ) break; 4884 if( *p!=',' ){ 4885 p = 0; 4886 break; 4887 } 4888 p++; 4889 } 4890 4891 return p; 4892 } 4893 4894 /* 4895 ** Parameter zIn contains a rank() function specification. The format of 4896 ** this is: 4897 ** 4898 ** + Bareword (function name) 4899 ** + Open parenthesis - "(" 4900 ** + Zero or more SQL literals in a comma separated list 4901 ** + Close parenthesis - ")" 4902 */ 4903 static int sqlite3Fts5ConfigParseRank( 4904 const char *zIn, /* Input string */ 4905 char **pzRank, /* OUT: Rank function name */ 4906 char **pzRankArgs /* OUT: Rank function arguments */ 4907 ){ 4908 const char *p = zIn; 4909 const char *pRank; 4910 char *zRank = 0; 4911 char *zRankArgs = 0; 4912 int rc = SQLITE_OK; 4913 4914 *pzRank = 0; 4915 *pzRankArgs = 0; 4916 4917 if( p==0 ){ 4918 rc = SQLITE_ERROR; 4919 }else{ 4920 p = fts5ConfigSkipWhitespace(p); 4921 pRank = p; 4922 p = fts5ConfigSkipBareword(p); 4923 4924 if( p ){ 4925 zRank = sqlite3Fts5MallocZero(&rc, 1 + p - pRank); 4926 if( zRank ) memcpy(zRank, pRank, p-pRank); 4927 }else{ 4928 rc = SQLITE_ERROR; 4929 } 4930 4931 if( rc==SQLITE_OK ){ 4932 p = fts5ConfigSkipWhitespace(p); 4933 if( *p!='(' ) rc = SQLITE_ERROR; 4934 p++; 4935 } 4936 if( rc==SQLITE_OK ){ 4937 const char *pArgs; 4938 p = fts5ConfigSkipWhitespace(p); 4939 pArgs = p; 4940 if( *p!=')' ){ 4941 p = fts5ConfigSkipArgs(p); 4942 if( p==0 ){ 4943 rc = SQLITE_ERROR; 4944 }else{ 4945 zRankArgs = sqlite3Fts5MallocZero(&rc, 1 + p - pArgs); 4946 if( zRankArgs ) memcpy(zRankArgs, pArgs, p-pArgs); 4947 } 4948 } 4949 } 4950 } 4951 4952 if( rc!=SQLITE_OK ){ 4953 sqlite3_free(zRank); 4954 assert( zRankArgs==0 ); 4955 }else{ 4956 *pzRank = zRank; 4957 *pzRankArgs = zRankArgs; 4958 } 4959 return rc; 4960 } 4961 4962 static int sqlite3Fts5ConfigSetValue( 4963 Fts5Config *pConfig, 4964 const char *zKey, 4965 sqlite3_value *pVal, 4966 int *pbBadkey 4967 ){ 4968 int rc = SQLITE_OK; 4969 4970 if( 0==sqlite3_stricmp(zKey, "pgsz") ){ 4971 int pgsz = 0; 4972 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ 4973 pgsz = sqlite3_value_int(pVal); 4974 } 4975 if( pgsz<32 || pgsz>FTS5_MAX_PAGE_SIZE ){ 4976 *pbBadkey = 1; 4977 }else{ 4978 pConfig->pgsz = pgsz; 4979 } 4980 } 4981 4982 else if( 0==sqlite3_stricmp(zKey, "hashsize") ){ 4983 int nHashSize = -1; 4984 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ 4985 nHashSize = sqlite3_value_int(pVal); 4986 } 4987 if( nHashSize<=0 ){ 4988 *pbBadkey = 1; 4989 }else{ 4990 pConfig->nHashSize = nHashSize; 4991 } 4992 } 4993 4994 else if( 0==sqlite3_stricmp(zKey, "automerge") ){ 4995 int nAutomerge = -1; 4996 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ 4997 nAutomerge = sqlite3_value_int(pVal); 4998 } 4999 if( nAutomerge<0 || nAutomerge>64 ){ 5000 *pbBadkey = 1; 5001 }else{ 5002 if( nAutomerge==1 ) nAutomerge = FTS5_DEFAULT_AUTOMERGE; 5003 pConfig->nAutomerge = nAutomerge; 5004 } 5005 } 5006 5007 else if( 0==sqlite3_stricmp(zKey, "usermerge") ){ 5008 int nUsermerge = -1; 5009 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ 5010 nUsermerge = sqlite3_value_int(pVal); 5011 } 5012 if( nUsermerge<2 || nUsermerge>16 ){ 5013 *pbBadkey = 1; 5014 }else{ 5015 pConfig->nUsermerge = nUsermerge; 5016 } 5017 } 5018 5019 else if( 0==sqlite3_stricmp(zKey, "crisismerge") ){ 5020 int nCrisisMerge = -1; 5021 if( SQLITE_INTEGER==sqlite3_value_numeric_type(pVal) ){ 5022 nCrisisMerge = sqlite3_value_int(pVal); 5023 } 5024 if( nCrisisMerge<0 ){ 5025 *pbBadkey = 1; 5026 }else{ 5027 if( nCrisisMerge<=1 ) nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; 5028 if( nCrisisMerge>=FTS5_MAX_SEGMENT ) nCrisisMerge = FTS5_MAX_SEGMENT-1; 5029 pConfig->nCrisisMerge = nCrisisMerge; 5030 } 5031 } 5032 5033 else if( 0==sqlite3_stricmp(zKey, "rank") ){ 5034 const char *zIn = (const char*)sqlite3_value_text(pVal); 5035 char *zRank; 5036 char *zRankArgs; 5037 rc = sqlite3Fts5ConfigParseRank(zIn, &zRank, &zRankArgs); 5038 if( rc==SQLITE_OK ){ 5039 sqlite3_free(pConfig->zRank); 5040 sqlite3_free(pConfig->zRankArgs); 5041 pConfig->zRank = zRank; 5042 pConfig->zRankArgs = zRankArgs; 5043 }else if( rc==SQLITE_ERROR ){ 5044 rc = SQLITE_OK; 5045 *pbBadkey = 1; 5046 } 5047 }else{ 5048 *pbBadkey = 1; 5049 } 5050 return rc; 5051 } 5052 5053 /* 5054 ** Load the contents of the %_config table into memory. 5055 */ 5056 static int sqlite3Fts5ConfigLoad(Fts5Config *pConfig, int iCookie){ 5057 const char *zSelect = "SELECT k, v FROM %Q.'%q_config'"; 5058 char *zSql; 5059 sqlite3_stmt *p = 0; 5060 int rc = SQLITE_OK; 5061 int iVersion = 0; 5062 5063 /* Set default values */ 5064 pConfig->pgsz = FTS5_DEFAULT_PAGE_SIZE; 5065 pConfig->nAutomerge = FTS5_DEFAULT_AUTOMERGE; 5066 pConfig->nUsermerge = FTS5_DEFAULT_USERMERGE; 5067 pConfig->nCrisisMerge = FTS5_DEFAULT_CRISISMERGE; 5068 pConfig->nHashSize = FTS5_DEFAULT_HASHSIZE; 5069 5070 zSql = sqlite3Fts5Mprintf(&rc, zSelect, pConfig->zDb, pConfig->zName); 5071 if( zSql ){ 5072 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p, 0); 5073 sqlite3_free(zSql); 5074 } 5075 5076 assert( rc==SQLITE_OK || p==0 ); 5077 if( rc==SQLITE_OK ){ 5078 while( SQLITE_ROW==sqlite3_step(p) ){ 5079 const char *zK = (const char*)sqlite3_column_text(p, 0); 5080 sqlite3_value *pVal = sqlite3_column_value(p, 1); 5081 if( 0==sqlite3_stricmp(zK, "version") ){ 5082 iVersion = sqlite3_value_int(pVal); 5083 }else{ 5084 int bDummy = 0; 5085 sqlite3Fts5ConfigSetValue(pConfig, zK, pVal, &bDummy); 5086 } 5087 } 5088 rc = sqlite3_finalize(p); 5089 } 5090 5091 if( rc==SQLITE_OK && iVersion!=FTS5_CURRENT_VERSION ){ 5092 rc = SQLITE_ERROR; 5093 if( pConfig->pzErrmsg ){ 5094 assert( 0==*pConfig->pzErrmsg ); 5095 *pConfig->pzErrmsg = sqlite3_mprintf( 5096 "invalid fts5 file format (found %d, expected %d) - run 'rebuild'", 5097 iVersion, FTS5_CURRENT_VERSION 5098 ); 5099 } 5100 } 5101 5102 if( rc==SQLITE_OK ){ 5103 pConfig->iCookie = iCookie; 5104 } 5105 return rc; 5106 } 5107 5108 #line 1 "fts5_expr.c" 5109 /* 5110 ** 2014 May 31 5111 ** 5112 ** The author disclaims copyright to this source code. In place of 5113 ** a legal notice, here is a blessing: 5114 ** 5115 ** May you do good and not evil. 5116 ** May you find forgiveness for yourself and forgive others. 5117 ** May you share freely, never taking more than you give. 5118 ** 5119 ****************************************************************************** 5120 ** 5121 */ 5122 5123 5124 5125 /* #include "fts5Int.h" */ 5126 /* #include "fts5parse.h" */ 5127 5128 /* 5129 ** All token types in the generated fts5parse.h file are greater than 0. 5130 */ 5131 #define FTS5_EOF 0 5132 5133 #define FTS5_LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) 5134 5135 typedef struct Fts5ExprTerm Fts5ExprTerm; 5136 5137 /* 5138 ** Functions generated by lemon from fts5parse.y. 5139 */ 5140 static void *sqlite3Fts5ParserAlloc(void *(*mallocProc)(u64)); 5141 static void sqlite3Fts5ParserFree(void*, void (*freeProc)(void*)); 5142 static void sqlite3Fts5Parser(void*, int, Fts5Token, Fts5Parse*); 5143 #ifndef NDEBUG 5144 #include <stdio.h> 5145 static void sqlite3Fts5ParserTrace(FILE*, char*); 5146 #endif 5147 static int sqlite3Fts5ParserFallback(int); 5148 5149 5150 struct Fts5Expr { 5151 Fts5Index *pIndex; 5152 Fts5Config *pConfig; 5153 Fts5ExprNode *pRoot; 5154 int bDesc; /* Iterate in descending rowid order */ 5155 int nPhrase; /* Number of phrases in expression */ 5156 Fts5ExprPhrase **apExprPhrase; /* Pointers to phrase objects */ 5157 }; 5158 5159 /* 5160 ** eType: 5161 ** Expression node type. Always one of: 5162 ** 5163 ** FTS5_AND (nChild, apChild valid) 5164 ** FTS5_OR (nChild, apChild valid) 5165 ** FTS5_NOT (nChild, apChild valid) 5166 ** FTS5_STRING (pNear valid) 5167 ** FTS5_TERM (pNear valid) 5168 */ 5169 struct Fts5ExprNode { 5170 int eType; /* Node type */ 5171 int bEof; /* True at EOF */ 5172 int bNomatch; /* True if entry is not a match */ 5173 5174 /* Next method for this node. */ 5175 int (*xNext)(Fts5Expr*, Fts5ExprNode*, int, i64); 5176 5177 i64 iRowid; /* Current rowid */ 5178 Fts5ExprNearset *pNear; /* For FTS5_STRING - cluster of phrases */ 5179 5180 /* Child nodes. For a NOT node, this array always contains 2 entries. For 5181 ** AND or OR nodes, it contains 2 or more entries. */ 5182 int nChild; /* Number of child nodes */ 5183 Fts5ExprNode *apChild[1]; /* Array of child nodes */ 5184 }; 5185 5186 #define Fts5NodeIsString(p) ((p)->eType==FTS5_TERM || (p)->eType==FTS5_STRING) 5187 5188 /* 5189 ** Invoke the xNext method of an Fts5ExprNode object. This macro should be 5190 ** used as if it has the same signature as the xNext() methods themselves. 5191 */ 5192 #define fts5ExprNodeNext(a,b,c,d) (b)->xNext((a), (b), (c), (d)) 5193 5194 /* 5195 ** An instance of the following structure represents a single search term 5196 ** or term prefix. 5197 */ 5198 struct Fts5ExprTerm { 5199 u8 bPrefix; /* True for a prefix term */ 5200 u8 bFirst; /* True if token must be first in column */ 5201 char *zTerm; /* nul-terminated term */ 5202 Fts5IndexIter *pIter; /* Iterator for this term */ 5203 Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ 5204 }; 5205 5206 /* 5207 ** A phrase. One or more terms that must appear in a contiguous sequence 5208 ** within a document for it to match. 5209 */ 5210 struct Fts5ExprPhrase { 5211 Fts5ExprNode *pNode; /* FTS5_STRING node this phrase is part of */ 5212 Fts5Buffer poslist; /* Current position list */ 5213 int nTerm; /* Number of entries in aTerm[] */ 5214 Fts5ExprTerm aTerm[1]; /* Terms that make up this phrase */ 5215 }; 5216 5217 /* 5218 ** One or more phrases that must appear within a certain token distance of 5219 ** each other within each matching document. 5220 */ 5221 struct Fts5ExprNearset { 5222 int nNear; /* NEAR parameter */ 5223 Fts5Colset *pColset; /* Columns to search (NULL -> all columns) */ 5224 int nPhrase; /* Number of entries in aPhrase[] array */ 5225 Fts5ExprPhrase *apPhrase[1]; /* Array of phrase pointers */ 5226 }; 5227 5228 5229 /* 5230 ** Parse context. 5231 */ 5232 struct Fts5Parse { 5233 Fts5Config *pConfig; 5234 char *zErr; 5235 int rc; 5236 int nPhrase; /* Size of apPhrase array */ 5237 Fts5ExprPhrase **apPhrase; /* Array of all phrases */ 5238 Fts5ExprNode *pExpr; /* Result of a successful parse */ 5239 int bPhraseToAnd; /* Convert "a+b" to "a AND b" */ 5240 }; 5241 5242 static void sqlite3Fts5ParseError(Fts5Parse *pParse, const char *zFmt, ...){ 5243 va_list ap; 5244 va_start(ap, zFmt); 5245 if( pParse->rc==SQLITE_OK ){ 5246 assert( pParse->zErr==0 ); 5247 pParse->zErr = sqlite3_vmprintf(zFmt, ap); 5248 pParse->rc = SQLITE_ERROR; 5249 } 5250 va_end(ap); 5251 } 5252 5253 static int fts5ExprIsspace(char t){ 5254 return t==' ' || t=='\t' || t=='\n' || t=='\r'; 5255 } 5256 5257 /* 5258 ** Read the first token from the nul-terminated string at *pz. 5259 */ 5260 static int fts5ExprGetToken( 5261 Fts5Parse *pParse, 5262 const char **pz, /* IN/OUT: Pointer into buffer */ 5263 Fts5Token *pToken 5264 ){ 5265 const char *z = *pz; 5266 int tok; 5267 5268 /* Skip past any whitespace */ 5269 while( fts5ExprIsspace(*z) ) z++; 5270 5271 pToken->p = z; 5272 pToken->n = 1; 5273 switch( *z ){ 5274 case '(': tok = FTS5_LP; break; 5275 case ')': tok = FTS5_RP; break; 5276 case '{': tok = FTS5_LCP; break; 5277 case '}': tok = FTS5_RCP; break; 5278 case ':': tok = FTS5_COLON; break; 5279 case ',': tok = FTS5_COMMA; break; 5280 case '+': tok = FTS5_PLUS; break; 5281 case '*': tok = FTS5_STAR; break; 5282 case '-': tok = FTS5_MINUS; break; 5283 case '^': tok = FTS5_CARET; break; 5284 case '\0': tok = FTS5_EOF; break; 5285 5286 case '"': { 5287 const char *z2; 5288 tok = FTS5_STRING; 5289 5290 for(z2=&z[1]; 1; z2++){ 5291 if( z2[0]=='"' ){ 5292 z2++; 5293 if( z2[0]!='"' ) break; 5294 } 5295 if( z2[0]=='\0' ){ 5296 sqlite3Fts5ParseError(pParse, "unterminated string"); 5297 return FTS5_EOF; 5298 } 5299 } 5300 pToken->n = (z2 - z); 5301 break; 5302 } 5303 5304 default: { 5305 const char *z2; 5306 if( sqlite3Fts5IsBareword(z[0])==0 ){ 5307 sqlite3Fts5ParseError(pParse, "fts5: syntax error near \"%.1s\"", z); 5308 return FTS5_EOF; 5309 } 5310 tok = FTS5_STRING; 5311 for(z2=&z[1]; sqlite3Fts5IsBareword(*z2); z2++); 5312 pToken->n = (z2 - z); 5313 if( pToken->n==2 && memcmp(pToken->p, "OR", 2)==0 ) tok = FTS5_OR; 5314 if( pToken->n==3 && memcmp(pToken->p, "NOT", 3)==0 ) tok = FTS5_NOT; 5315 if( pToken->n==3 && memcmp(pToken->p, "AND", 3)==0 ) tok = FTS5_AND; 5316 break; 5317 } 5318 } 5319 5320 *pz = &pToken->p[pToken->n]; 5321 return tok; 5322 } 5323 5324 static void *fts5ParseAlloc(u64 t){ return sqlite3_malloc64((sqlite3_int64)t);} 5325 static void fts5ParseFree(void *p){ sqlite3_free(p); } 5326 5327 static int sqlite3Fts5ExprNew( 5328 Fts5Config *pConfig, /* FTS5 Configuration */ 5329 int bPhraseToAnd, 5330 int iCol, 5331 const char *zExpr, /* Expression text */ 5332 Fts5Expr **ppNew, 5333 char **pzErr 5334 ){ 5335 Fts5Parse sParse; 5336 Fts5Token token; 5337 const char *z = zExpr; 5338 int t; /* Next token type */ 5339 void *pEngine; 5340 Fts5Expr *pNew; 5341 5342 *ppNew = 0; 5343 *pzErr = 0; 5344 memset(&sParse, 0, sizeof(sParse)); 5345 sParse.bPhraseToAnd = bPhraseToAnd; 5346 pEngine = sqlite3Fts5ParserAlloc(fts5ParseAlloc); 5347 if( pEngine==0 ){ return SQLITE_NOMEM; } 5348 sParse.pConfig = pConfig; 5349 5350 do { 5351 t = fts5ExprGetToken(&sParse, &z, &token); 5352 sqlite3Fts5Parser(pEngine, t, token, &sParse); 5353 }while( sParse.rc==SQLITE_OK && t!=FTS5_EOF ); 5354 sqlite3Fts5ParserFree(pEngine, fts5ParseFree); 5355 5356 /* If the LHS of the MATCH expression was a user column, apply the 5357 ** implicit column-filter. */ 5358 if( iCol<pConfig->nCol && sParse.pExpr && sParse.rc==SQLITE_OK ){ 5359 int n = sizeof(Fts5Colset); 5360 Fts5Colset *pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&sParse.rc, n); 5361 if( pColset ){ 5362 pColset->nCol = 1; 5363 pColset->aiCol[0] = iCol; 5364 sqlite3Fts5ParseSetColset(&sParse, sParse.pExpr, pColset); 5365 } 5366 } 5367 5368 assert( sParse.rc!=SQLITE_OK || sParse.zErr==0 ); 5369 if( sParse.rc==SQLITE_OK ){ 5370 *ppNew = pNew = sqlite3_malloc(sizeof(Fts5Expr)); 5371 if( pNew==0 ){ 5372 sParse.rc = SQLITE_NOMEM; 5373 sqlite3Fts5ParseNodeFree(sParse.pExpr); 5374 }else{ 5375 if( !sParse.pExpr ){ 5376 const int nByte = sizeof(Fts5ExprNode); 5377 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&sParse.rc, nByte); 5378 if( pNew->pRoot ){ 5379 pNew->pRoot->bEof = 1; 5380 } 5381 }else{ 5382 pNew->pRoot = sParse.pExpr; 5383 } 5384 pNew->pIndex = 0; 5385 pNew->pConfig = pConfig; 5386 pNew->apExprPhrase = sParse.apPhrase; 5387 pNew->nPhrase = sParse.nPhrase; 5388 pNew->bDesc = 0; 5389 sParse.apPhrase = 0; 5390 } 5391 }else{ 5392 sqlite3Fts5ParseNodeFree(sParse.pExpr); 5393 } 5394 5395 sqlite3_free(sParse.apPhrase); 5396 *pzErr = sParse.zErr; 5397 return sParse.rc; 5398 } 5399 5400 /* 5401 ** This function is only called when using the special 'trigram' tokenizer. 5402 ** Argument zText contains the text of a LIKE or GLOB pattern matched 5403 ** against column iCol. This function creates and compiles an FTS5 MATCH 5404 ** expression that will match a superset of the rows matched by the LIKE or 5405 ** GLOB. If successful, SQLITE_OK is returned. Otherwise, an SQLite error 5406 ** code. 5407 */ 5408 static int sqlite3Fts5ExprPattern( 5409 Fts5Config *pConfig, int bGlob, int iCol, const char *zText, Fts5Expr **pp 5410 ){ 5411 i64 nText = strlen(zText); 5412 char *zExpr = (char*)sqlite3_malloc64(nText*4 + 1); 5413 int rc = SQLITE_OK; 5414 5415 if( zExpr==0 ){ 5416 rc = SQLITE_NOMEM; 5417 }else{ 5418 char aSpec[3]; 5419 int iOut = 0; 5420 int i = 0; 5421 int iFirst = 0; 5422 5423 if( bGlob==0 ){ 5424 aSpec[0] = '_'; 5425 aSpec[1] = '%'; 5426 aSpec[2] = 0; 5427 }else{ 5428 aSpec[0] = '*'; 5429 aSpec[1] = '?'; 5430 aSpec[2] = '['; 5431 } 5432 5433 while( i<=nText ){ 5434 if( i==nText 5435 || zText[i]==aSpec[0] || zText[i]==aSpec[1] || zText[i]==aSpec[2] 5436 ){ 5437 if( i-iFirst>=3 ){ 5438 int jj; 5439 zExpr[iOut++] = '"'; 5440 for(jj=iFirst; jj<i; jj++){ 5441 zExpr[iOut++] = zText[jj]; 5442 if( zText[jj]=='"' ) zExpr[iOut++] = '"'; 5443 } 5444 zExpr[iOut++] = '"'; 5445 zExpr[iOut++] = ' '; 5446 } 5447 if( zText[i]==aSpec[2] ){ 5448 i += 2; 5449 if( zText[i-1]=='^' ) i++; 5450 while( i<nText && zText[i]!=']' ) i++; 5451 } 5452 iFirst = i+1; 5453 } 5454 i++; 5455 } 5456 if( iOut>0 ){ 5457 int bAnd = 0; 5458 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ 5459 bAnd = 1; 5460 if( pConfig->eDetail==FTS5_DETAIL_NONE ){ 5461 iCol = pConfig->nCol; 5462 } 5463 } 5464 zExpr[iOut] = '\0'; 5465 rc = sqlite3Fts5ExprNew(pConfig, bAnd, iCol, zExpr, pp,pConfig->pzErrmsg); 5466 }else{ 5467 *pp = 0; 5468 } 5469 sqlite3_free(zExpr); 5470 } 5471 5472 return rc; 5473 } 5474 5475 /* 5476 ** Free the expression node object passed as the only argument. 5477 */ 5478 static void sqlite3Fts5ParseNodeFree(Fts5ExprNode *p){ 5479 if( p ){ 5480 int i; 5481 for(i=0; i<p->nChild; i++){ 5482 sqlite3Fts5ParseNodeFree(p->apChild[i]); 5483 } 5484 sqlite3Fts5ParseNearsetFree(p->pNear); 5485 sqlite3_free(p); 5486 } 5487 } 5488 5489 /* 5490 ** Free the expression object passed as the only argument. 5491 */ 5492 static void sqlite3Fts5ExprFree(Fts5Expr *p){ 5493 if( p ){ 5494 sqlite3Fts5ParseNodeFree(p->pRoot); 5495 sqlite3_free(p->apExprPhrase); 5496 sqlite3_free(p); 5497 } 5498 } 5499 5500 static int sqlite3Fts5ExprAnd(Fts5Expr **pp1, Fts5Expr *p2){ 5501 Fts5Parse sParse; 5502 memset(&sParse, 0, sizeof(sParse)); 5503 5504 if( *pp1 ){ 5505 Fts5Expr *p1 = *pp1; 5506 int nPhrase = p1->nPhrase + p2->nPhrase; 5507 5508 p1->pRoot = sqlite3Fts5ParseNode(&sParse, FTS5_AND, p1->pRoot, p2->pRoot,0); 5509 p2->pRoot = 0; 5510 5511 if( sParse.rc==SQLITE_OK ){ 5512 Fts5ExprPhrase **ap = (Fts5ExprPhrase**)sqlite3_realloc( 5513 p1->apExprPhrase, nPhrase * sizeof(Fts5ExprPhrase*) 5514 ); 5515 if( ap==0 ){ 5516 sParse.rc = SQLITE_NOMEM; 5517 }else{ 5518 int i; 5519 memmove(&ap[p2->nPhrase], ap, p1->nPhrase*sizeof(Fts5ExprPhrase*)); 5520 for(i=0; i<p2->nPhrase; i++){ 5521 ap[i] = p2->apExprPhrase[i]; 5522 } 5523 p1->nPhrase = nPhrase; 5524 p1->apExprPhrase = ap; 5525 } 5526 } 5527 sqlite3_free(p2->apExprPhrase); 5528 sqlite3_free(p2); 5529 }else{ 5530 *pp1 = p2; 5531 } 5532 5533 return sParse.rc; 5534 } 5535 5536 /* 5537 ** Argument pTerm must be a synonym iterator. Return the current rowid 5538 ** that it points to. 5539 */ 5540 static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ 5541 i64 iRet = 0; 5542 int bRetValid = 0; 5543 Fts5ExprTerm *p; 5544 5545 assert( pTerm ); 5546 assert( pTerm->pSynonym ); 5547 assert( bDesc==0 || bDesc==1 ); 5548 for(p=pTerm; p; p=p->pSynonym){ 5549 if( 0==sqlite3Fts5IterEof(p->pIter) ){ 5550 i64 iRowid = p->pIter->iRowid; 5551 if( bRetValid==0 || (bDesc!=(iRowid<iRet)) ){ 5552 iRet = iRowid; 5553 bRetValid = 1; 5554 } 5555 } 5556 } 5557 5558 if( pbEof && bRetValid==0 ) *pbEof = 1; 5559 return iRet; 5560 } 5561 5562 /* 5563 ** Argument pTerm must be a synonym iterator. 5564 */ 5565 static int fts5ExprSynonymList( 5566 Fts5ExprTerm *pTerm, 5567 i64 iRowid, 5568 Fts5Buffer *pBuf, /* Use this buffer for space if required */ 5569 u8 **pa, int *pn 5570 ){ 5571 Fts5PoslistReader aStatic[4]; 5572 Fts5PoslistReader *aIter = aStatic; 5573 int nIter = 0; 5574 int nAlloc = 4; 5575 int rc = SQLITE_OK; 5576 Fts5ExprTerm *p; 5577 5578 assert( pTerm->pSynonym ); 5579 for(p=pTerm; p; p=p->pSynonym){ 5580 Fts5IndexIter *pIter = p->pIter; 5581 if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){ 5582 if( pIter->nData==0 ) continue; 5583 if( nIter==nAlloc ){ 5584 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; 5585 Fts5PoslistReader *aNew = (Fts5PoslistReader*)sqlite3_malloc64(nByte); 5586 if( aNew==0 ){ 5587 rc = SQLITE_NOMEM; 5588 goto synonym_poslist_out; 5589 } 5590 memcpy(aNew, aIter, sizeof(Fts5PoslistReader) * nIter); 5591 nAlloc = nAlloc*2; 5592 if( aIter!=aStatic ) sqlite3_free(aIter); 5593 aIter = aNew; 5594 } 5595 sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &aIter[nIter]); 5596 assert( aIter[nIter].bEof==0 ); 5597 nIter++; 5598 } 5599 } 5600 5601 if( nIter==1 ){ 5602 *pa = (u8*)aIter[0].a; 5603 *pn = aIter[0].n; 5604 }else{ 5605 Fts5PoslistWriter writer = {0}; 5606 i64 iPrev = -1; 5607 fts5BufferZero(pBuf); 5608 while( 1 ){ 5609 int i; 5610 i64 iMin = FTS5_LARGEST_INT64; 5611 for(i=0; i<nIter; i++){ 5612 if( aIter[i].bEof==0 ){ 5613 if( aIter[i].iPos==iPrev ){ 5614 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) continue; 5615 } 5616 if( aIter[i].iPos<iMin ){ 5617 iMin = aIter[i].iPos; 5618 } 5619 } 5620 } 5621 if( iMin==FTS5_LARGEST_INT64 || rc!=SQLITE_OK ) break; 5622 rc = sqlite3Fts5PoslistWriterAppend(pBuf, &writer, iMin); 5623 iPrev = iMin; 5624 } 5625 if( rc==SQLITE_OK ){ 5626 *pa = pBuf->p; 5627 *pn = pBuf->n; 5628 } 5629 } 5630 5631 synonym_poslist_out: 5632 if( aIter!=aStatic ) sqlite3_free(aIter); 5633 return rc; 5634 } 5635 5636 5637 /* 5638 ** All individual term iterators in pPhrase are guaranteed to be valid and 5639 ** pointing to the same rowid when this function is called. This function 5640 ** checks if the current rowid really is a match, and if so populates 5641 ** the pPhrase->poslist buffer accordingly. Output parameter *pbMatch 5642 ** is set to true if this is really a match, or false otherwise. 5643 ** 5644 ** SQLITE_OK is returned if an error occurs, or an SQLite error code 5645 ** otherwise. It is not considered an error code if the current rowid is 5646 ** not a match. 5647 */ 5648 static int fts5ExprPhraseIsMatch( 5649 Fts5ExprNode *pNode, /* Node pPhrase belongs to */ 5650 Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ 5651 int *pbMatch /* OUT: Set to true if really a match */ 5652 ){ 5653 Fts5PoslistWriter writer = {0}; 5654 Fts5PoslistReader aStatic[4]; 5655 Fts5PoslistReader *aIter = aStatic; 5656 int i; 5657 int rc = SQLITE_OK; 5658 int bFirst = pPhrase->aTerm[0].bFirst; 5659 5660 fts5BufferZero(&pPhrase->poslist); 5661 5662 /* If the aStatic[] array is not large enough, allocate a large array 5663 ** using sqlite3_malloc(). This approach could be improved upon. */ 5664 if( pPhrase->nTerm>ArraySize(aStatic) ){ 5665 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * pPhrase->nTerm; 5666 aIter = (Fts5PoslistReader*)sqlite3_malloc64(nByte); 5667 if( !aIter ) return SQLITE_NOMEM; 5668 } 5669 memset(aIter, 0, sizeof(Fts5PoslistReader) * pPhrase->nTerm); 5670 5671 /* Initialize a term iterator for each term in the phrase */ 5672 for(i=0; i<pPhrase->nTerm; i++){ 5673 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; 5674 int n = 0; 5675 int bFlag = 0; 5676 u8 *a = 0; 5677 if( pTerm->pSynonym ){ 5678 Fts5Buffer buf = {0, 0, 0}; 5679 rc = fts5ExprSynonymList(pTerm, pNode->iRowid, &buf, &a, &n); 5680 if( rc ){ 5681 sqlite3_free(a); 5682 goto ismatch_out; 5683 } 5684 if( a==buf.p ) bFlag = 1; 5685 }else{ 5686 a = (u8*)pTerm->pIter->pData; 5687 n = pTerm->pIter->nData; 5688 } 5689 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); 5690 aIter[i].bFlag = (u8)bFlag; 5691 if( aIter[i].bEof ) goto ismatch_out; 5692 } 5693 5694 while( 1 ){ 5695 int bMatch; 5696 i64 iPos = aIter[0].iPos; 5697 do { 5698 bMatch = 1; 5699 for(i=0; i<pPhrase->nTerm; i++){ 5700 Fts5PoslistReader *pPos = &aIter[i]; 5701 i64 iAdj = iPos + i; 5702 if( pPos->iPos!=iAdj ){ 5703 bMatch = 0; 5704 while( pPos->iPos<iAdj ){ 5705 if( sqlite3Fts5PoslistReaderNext(pPos) ) goto ismatch_out; 5706 } 5707 if( pPos->iPos>iAdj ) iPos = pPos->iPos-i; 5708 } 5709 } 5710 }while( bMatch==0 ); 5711 5712 /* Append position iPos to the output */ 5713 if( bFirst==0 || FTS5_POS2OFFSET(iPos)==0 ){ 5714 rc = sqlite3Fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); 5715 if( rc!=SQLITE_OK ) goto ismatch_out; 5716 } 5717 5718 for(i=0; i<pPhrase->nTerm; i++){ 5719 if( sqlite3Fts5PoslistReaderNext(&aIter[i]) ) goto ismatch_out; 5720 } 5721 } 5722 5723 ismatch_out: 5724 *pbMatch = (pPhrase->poslist.n>0); 5725 for(i=0; i<pPhrase->nTerm; i++){ 5726 if( aIter[i].bFlag ) sqlite3_free((u8*)aIter[i].a); 5727 } 5728 if( aIter!=aStatic ) sqlite3_free(aIter); 5729 return rc; 5730 } 5731 5732 typedef struct Fts5LookaheadReader Fts5LookaheadReader; 5733 struct Fts5LookaheadReader { 5734 const u8 *a; /* Buffer containing position list */ 5735 int n; /* Size of buffer a[] in bytes */ 5736 int i; /* Current offset in position list */ 5737 i64 iPos; /* Current position */ 5738 i64 iLookahead; /* Next position */ 5739 }; 5740 5741 #define FTS5_LOOKAHEAD_EOF (((i64)1) << 62) 5742 5743 static int fts5LookaheadReaderNext(Fts5LookaheadReader *p){ 5744 p->iPos = p->iLookahead; 5745 if( sqlite3Fts5PoslistNext64(p->a, p->n, &p->i, &p->iLookahead) ){ 5746 p->iLookahead = FTS5_LOOKAHEAD_EOF; 5747 } 5748 return (p->iPos==FTS5_LOOKAHEAD_EOF); 5749 } 5750 5751 static int fts5LookaheadReaderInit( 5752 const u8 *a, int n, /* Buffer to read position list from */ 5753 Fts5LookaheadReader *p /* Iterator object to initialize */ 5754 ){ 5755 memset(p, 0, sizeof(Fts5LookaheadReader)); 5756 p->a = a; 5757 p->n = n; 5758 fts5LookaheadReaderNext(p); 5759 return fts5LookaheadReaderNext(p); 5760 } 5761 5762 typedef struct Fts5NearTrimmer Fts5NearTrimmer; 5763 struct Fts5NearTrimmer { 5764 Fts5LookaheadReader reader; /* Input iterator */ 5765 Fts5PoslistWriter writer; /* Writer context */ 5766 Fts5Buffer *pOut; /* Output poslist */ 5767 }; 5768 5769 /* 5770 ** The near-set object passed as the first argument contains more than 5771 ** one phrase. All phrases currently point to the same row. The 5772 ** Fts5ExprPhrase.poslist buffers are populated accordingly. This function 5773 ** tests if the current row contains instances of each phrase sufficiently 5774 ** close together to meet the NEAR constraint. Non-zero is returned if it 5775 ** does, or zero otherwise. 5776 ** 5777 ** If in/out parameter (*pRc) is set to other than SQLITE_OK when this 5778 ** function is called, it is a no-op. Or, if an error (e.g. SQLITE_NOMEM) 5779 ** occurs within this function (*pRc) is set accordingly before returning. 5780 ** The return value is undefined in both these cases. 5781 ** 5782 ** If no error occurs and non-zero (a match) is returned, the position-list 5783 ** of each phrase object is edited to contain only those entries that 5784 ** meet the constraint before returning. 5785 */ 5786 static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ 5787 Fts5NearTrimmer aStatic[4]; 5788 Fts5NearTrimmer *a = aStatic; 5789 Fts5ExprPhrase **apPhrase = pNear->apPhrase; 5790 5791 int i; 5792 int rc = *pRc; 5793 int bMatch; 5794 5795 assert( pNear->nPhrase>1 ); 5796 5797 /* If the aStatic[] array is not large enough, allocate a large array 5798 ** using sqlite3_malloc(). This approach could be improved upon. */ 5799 if( pNear->nPhrase>ArraySize(aStatic) ){ 5800 sqlite3_int64 nByte = sizeof(Fts5NearTrimmer) * pNear->nPhrase; 5801 a = (Fts5NearTrimmer*)sqlite3Fts5MallocZero(&rc, nByte); 5802 }else{ 5803 memset(aStatic, 0, sizeof(aStatic)); 5804 } 5805 if( rc!=SQLITE_OK ){ 5806 *pRc = rc; 5807 return 0; 5808 } 5809 5810 /* Initialize a lookahead iterator for each phrase. After passing the 5811 ** buffer and buffer size to the lookaside-reader init function, zero 5812 ** the phrase poslist buffer. The new poslist for the phrase (containing 5813 ** the same entries as the original with some entries removed on account 5814 ** of the NEAR constraint) is written over the original even as it is 5815 ** being read. This is safe as the entries for the new poslist are a 5816 ** subset of the old, so it is not possible for data yet to be read to 5817 ** be overwritten. */ 5818 for(i=0; i<pNear->nPhrase; i++){ 5819 Fts5Buffer *pPoslist = &apPhrase[i]->poslist; 5820 fts5LookaheadReaderInit(pPoslist->p, pPoslist->n, &a[i].reader); 5821 pPoslist->n = 0; 5822 a[i].pOut = pPoslist; 5823 } 5824 5825 while( 1 ){ 5826 int iAdv; 5827 i64 iMin; 5828 i64 iMax; 5829 5830 /* This block advances the phrase iterators until they point to a set of 5831 ** entries that together comprise a match. */ 5832 iMax = a[0].reader.iPos; 5833 do { 5834 bMatch = 1; 5835 for(i=0; i<pNear->nPhrase; i++){ 5836 Fts5LookaheadReader *pPos = &a[i].reader; 5837 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; 5838 if( pPos->iPos<iMin || pPos->iPos>iMax ){ 5839 bMatch = 0; 5840 while( pPos->iPos<iMin ){ 5841 if( fts5LookaheadReaderNext(pPos) ) goto ismatch_out; 5842 } 5843 if( pPos->iPos>iMax ) iMax = pPos->iPos; 5844 } 5845 } 5846 }while( bMatch==0 ); 5847 5848 /* Add an entry to each output position list */ 5849 for(i=0; i<pNear->nPhrase; i++){ 5850 i64 iPos = a[i].reader.iPos; 5851 Fts5PoslistWriter *pWriter = &a[i].writer; 5852 if( a[i].pOut->n==0 || iPos!=pWriter->iPrev ){ 5853 sqlite3Fts5PoslistWriterAppend(a[i].pOut, pWriter, iPos); 5854 } 5855 } 5856 5857 iAdv = 0; 5858 iMin = a[0].reader.iLookahead; 5859 for(i=0; i<pNear->nPhrase; i++){ 5860 if( a[i].reader.iLookahead < iMin ){ 5861 iMin = a[i].reader.iLookahead; 5862 iAdv = i; 5863 } 5864 } 5865 if( fts5LookaheadReaderNext(&a[iAdv].reader) ) goto ismatch_out; 5866 } 5867 5868 ismatch_out: { 5869 int bRet = a[0].pOut->n>0; 5870 *pRc = rc; 5871 if( a!=aStatic ) sqlite3_free(a); 5872 return bRet; 5873 } 5874 } 5875 5876 /* 5877 ** Advance iterator pIter until it points to a value equal to or laster 5878 ** than the initial value of *piLast. If this means the iterator points 5879 ** to a value laster than *piLast, update *piLast to the new lastest value. 5880 ** 5881 ** If the iterator reaches EOF, set *pbEof to true before returning. If 5882 ** an error occurs, set *pRc to an error code. If either *pbEof or *pRc 5883 ** are set, return a non-zero value. Otherwise, return zero. 5884 */ 5885 static int fts5ExprAdvanceto( 5886 Fts5IndexIter *pIter, /* Iterator to advance */ 5887 int bDesc, /* True if iterator is "rowid DESC" */ 5888 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ 5889 int *pRc, /* OUT: Error code */ 5890 int *pbEof /* OUT: Set to true if EOF */ 5891 ){ 5892 i64 iLast = *piLast; 5893 i64 iRowid; 5894 5895 iRowid = pIter->iRowid; 5896 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ 5897 int rc = sqlite3Fts5IterNextFrom(pIter, iLast); 5898 if( rc || sqlite3Fts5IterEof(pIter) ){ 5899 *pRc = rc; 5900 *pbEof = 1; 5901 return 1; 5902 } 5903 iRowid = pIter->iRowid; 5904 assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); 5905 } 5906 *piLast = iRowid; 5907 5908 return 0; 5909 } 5910 5911 static int fts5ExprSynonymAdvanceto( 5912 Fts5ExprTerm *pTerm, /* Term iterator to advance */ 5913 int bDesc, /* True if iterator is "rowid DESC" */ 5914 i64 *piLast, /* IN/OUT: Lastest rowid seen so far */ 5915 int *pRc /* OUT: Error code */ 5916 ){ 5917 int rc = SQLITE_OK; 5918 i64 iLast = *piLast; 5919 Fts5ExprTerm *p; 5920 int bEof = 0; 5921 5922 for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ 5923 if( sqlite3Fts5IterEof(p->pIter)==0 ){ 5924 i64 iRowid = p->pIter->iRowid; 5925 if( (bDesc==0 && iLast>iRowid) || (bDesc && iLast<iRowid) ){ 5926 rc = sqlite3Fts5IterNextFrom(p->pIter, iLast); 5927 } 5928 } 5929 } 5930 5931 if( rc!=SQLITE_OK ){ 5932 *pRc = rc; 5933 bEof = 1; 5934 }else{ 5935 *piLast = fts5ExprSynonymRowid(pTerm, bDesc, &bEof); 5936 } 5937 return bEof; 5938 } 5939 5940 5941 static int fts5ExprNearTest( 5942 int *pRc, 5943 Fts5Expr *pExpr, /* Expression that pNear is a part of */ 5944 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_STRING) */ 5945 ){ 5946 Fts5ExprNearset *pNear = pNode->pNear; 5947 int rc = *pRc; 5948 5949 if( pExpr->pConfig->eDetail!=FTS5_DETAIL_FULL ){ 5950 Fts5ExprTerm *pTerm; 5951 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; 5952 pPhrase->poslist.n = 0; 5953 for(pTerm=&pPhrase->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ 5954 Fts5IndexIter *pIter = pTerm->pIter; 5955 if( sqlite3Fts5IterEof(pIter)==0 ){ 5956 if( pIter->iRowid==pNode->iRowid && pIter->nData>0 ){ 5957 pPhrase->poslist.n = 1; 5958 } 5959 } 5960 } 5961 return pPhrase->poslist.n; 5962 }else{ 5963 int i; 5964 5965 /* Check that each phrase in the nearset matches the current row. 5966 ** Populate the pPhrase->poslist buffers at the same time. If any 5967 ** phrase is not a match, break out of the loop early. */ 5968 for(i=0; rc==SQLITE_OK && i<pNear->nPhrase; i++){ 5969 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 5970 if( pPhrase->nTerm>1 || pPhrase->aTerm[0].pSynonym 5971 || pNear->pColset || pPhrase->aTerm[0].bFirst 5972 ){ 5973 int bMatch = 0; 5974 rc = fts5ExprPhraseIsMatch(pNode, pPhrase, &bMatch); 5975 if( bMatch==0 ) break; 5976 }else{ 5977 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; 5978 fts5BufferSet(&rc, &pPhrase->poslist, pIter->nData, pIter->pData); 5979 } 5980 } 5981 5982 *pRc = rc; 5983 if( i==pNear->nPhrase && (i==1 || fts5ExprNearIsMatch(pRc, pNear)) ){ 5984 return 1; 5985 } 5986 return 0; 5987 } 5988 } 5989 5990 5991 /* 5992 ** Initialize all term iterators in the pNear object. If any term is found 5993 ** to match no documents at all, return immediately without initializing any 5994 ** further iterators. 5995 ** 5996 ** If an error occurs, return an SQLite error code. Otherwise, return 5997 ** SQLITE_OK. It is not considered an error if some term matches zero 5998 ** documents. 5999 */ 6000 static int fts5ExprNearInitAll( 6001 Fts5Expr *pExpr, 6002 Fts5ExprNode *pNode 6003 ){ 6004 Fts5ExprNearset *pNear = pNode->pNear; 6005 int i; 6006 6007 assert( pNode->bNomatch==0 ); 6008 for(i=0; i<pNear->nPhrase; i++){ 6009 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 6010 if( pPhrase->nTerm==0 ){ 6011 pNode->bEof = 1; 6012 return SQLITE_OK; 6013 }else{ 6014 int j; 6015 for(j=0; j<pPhrase->nTerm; j++){ 6016 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; 6017 Fts5ExprTerm *p; 6018 int bHit = 0; 6019 6020 for(p=pTerm; p; p=p->pSynonym){ 6021 int rc; 6022 if( p->pIter ){ 6023 sqlite3Fts5IterClose(p->pIter); 6024 p->pIter = 0; 6025 } 6026 rc = sqlite3Fts5IndexQuery( 6027 pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), 6028 (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | 6029 (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), 6030 pNear->pColset, 6031 &p->pIter 6032 ); 6033 assert( (rc==SQLITE_OK)==(p->pIter!=0) ); 6034 if( rc!=SQLITE_OK ) return rc; 6035 if( 0==sqlite3Fts5IterEof(p->pIter) ){ 6036 bHit = 1; 6037 } 6038 } 6039 6040 if( bHit==0 ){ 6041 pNode->bEof = 1; 6042 return SQLITE_OK; 6043 } 6044 } 6045 } 6046 } 6047 6048 pNode->bEof = 0; 6049 return SQLITE_OK; 6050 } 6051 6052 /* 6053 ** If pExpr is an ASC iterator, this function returns a value with the 6054 ** same sign as: 6055 ** 6056 ** (iLhs - iRhs) 6057 ** 6058 ** Otherwise, if this is a DESC iterator, the opposite is returned: 6059 ** 6060 ** (iRhs - iLhs) 6061 */ 6062 static int fts5RowidCmp( 6063 Fts5Expr *pExpr, 6064 i64 iLhs, 6065 i64 iRhs 6066 ){ 6067 assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); 6068 if( pExpr->bDesc==0 ){ 6069 if( iLhs<iRhs ) return -1; 6070 return (iLhs > iRhs); 6071 }else{ 6072 if( iLhs>iRhs ) return -1; 6073 return (iLhs < iRhs); 6074 } 6075 } 6076 6077 static void fts5ExprSetEof(Fts5ExprNode *pNode){ 6078 int i; 6079 pNode->bEof = 1; 6080 pNode->bNomatch = 0; 6081 for(i=0; i<pNode->nChild; i++){ 6082 fts5ExprSetEof(pNode->apChild[i]); 6083 } 6084 } 6085 6086 static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ 6087 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ 6088 Fts5ExprNearset *pNear = pNode->pNear; 6089 int i; 6090 for(i=0; i<pNear->nPhrase; i++){ 6091 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 6092 pPhrase->poslist.n = 0; 6093 } 6094 }else{ 6095 int i; 6096 for(i=0; i<pNode->nChild; i++){ 6097 fts5ExprNodeZeroPoslist(pNode->apChild[i]); 6098 } 6099 } 6100 } 6101 6102 6103 6104 /* 6105 ** Compare the values currently indicated by the two nodes as follows: 6106 ** 6107 ** res = (*p1) - (*p2) 6108 ** 6109 ** Nodes that point to values that come later in the iteration order are 6110 ** considered to be larger. Nodes at EOF are the largest of all. 6111 ** 6112 ** This means that if the iteration order is ASC, then numerically larger 6113 ** rowids are considered larger. Or if it is the default DESC, numerically 6114 ** smaller rowids are larger. 6115 */ 6116 static int fts5NodeCompare( 6117 Fts5Expr *pExpr, 6118 Fts5ExprNode *p1, 6119 Fts5ExprNode *p2 6120 ){ 6121 if( p2->bEof ) return -1; 6122 if( p1->bEof ) return +1; 6123 return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); 6124 } 6125 6126 /* 6127 ** All individual term iterators in pNear are guaranteed to be valid when 6128 ** this function is called. This function checks if all term iterators 6129 ** point to the same rowid, and if not, advances them until they do. 6130 ** If an EOF is reached before this happens, *pbEof is set to true before 6131 ** returning. 6132 ** 6133 ** SQLITE_OK is returned if an error occurs, or an SQLite error code 6134 ** otherwise. It is not considered an error code if an iterator reaches 6135 ** EOF. 6136 */ 6137 static int fts5ExprNodeTest_STRING( 6138 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ 6139 Fts5ExprNode *pNode 6140 ){ 6141 Fts5ExprNearset *pNear = pNode->pNear; 6142 Fts5ExprPhrase *pLeft = pNear->apPhrase[0]; 6143 int rc = SQLITE_OK; 6144 i64 iLast; /* Lastest rowid any iterator points to */ 6145 int i, j; /* Phrase and token index, respectively */ 6146 int bMatch; /* True if all terms are at the same rowid */ 6147 const int bDesc = pExpr->bDesc; 6148 6149 /* Check that this node should not be FTS5_TERM */ 6150 assert( pNear->nPhrase>1 6151 || pNear->apPhrase[0]->nTerm>1 6152 || pNear->apPhrase[0]->aTerm[0].pSynonym 6153 || pNear->apPhrase[0]->aTerm[0].bFirst 6154 ); 6155 6156 /* Initialize iLast, the "lastest" rowid any iterator points to. If the 6157 ** iterator skips through rowids in the default ascending order, this means 6158 ** the maximum rowid. Or, if the iterator is "ORDER BY rowid DESC", then it 6159 ** means the minimum rowid. */ 6160 if( pLeft->aTerm[0].pSynonym ){ 6161 iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); 6162 }else{ 6163 iLast = pLeft->aTerm[0].pIter->iRowid; 6164 } 6165 6166 do { 6167 bMatch = 1; 6168 for(i=0; i<pNear->nPhrase; i++){ 6169 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 6170 for(j=0; j<pPhrase->nTerm; j++){ 6171 Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; 6172 if( pTerm->pSynonym ){ 6173 i64 iRowid = fts5ExprSynonymRowid(pTerm, bDesc, 0); 6174 if( iRowid==iLast ) continue; 6175 bMatch = 0; 6176 if( fts5ExprSynonymAdvanceto(pTerm, bDesc, &iLast, &rc) ){ 6177 pNode->bNomatch = 0; 6178 pNode->bEof = 1; 6179 return rc; 6180 } 6181 }else{ 6182 Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; 6183 if( pIter->iRowid==iLast || pIter->bEof ) continue; 6184 bMatch = 0; 6185 if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ 6186 return rc; 6187 } 6188 } 6189 } 6190 } 6191 }while( bMatch==0 ); 6192 6193 pNode->iRowid = iLast; 6194 pNode->bNomatch = ((0==fts5ExprNearTest(&rc, pExpr, pNode)) && rc==SQLITE_OK); 6195 assert( pNode->bEof==0 || pNode->bNomatch==0 ); 6196 6197 return rc; 6198 } 6199 6200 /* 6201 ** Advance the first term iterator in the first phrase of pNear. Set output 6202 ** variable *pbEof to true if it reaches EOF or if an error occurs. 6203 ** 6204 ** Return SQLITE_OK if successful, or an SQLite error code if an error 6205 ** occurs. 6206 */ 6207 static int fts5ExprNodeNext_STRING( 6208 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ 6209 Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ 6210 int bFromValid, 6211 i64 iFrom 6212 ){ 6213 Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; 6214 int rc = SQLITE_OK; 6215 6216 pNode->bNomatch = 0; 6217 if( pTerm->pSynonym ){ 6218 int bEof = 1; 6219 Fts5ExprTerm *p; 6220 6221 /* Find the firstest rowid any synonym points to. */ 6222 i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); 6223 6224 /* Advance each iterator that currently points to iRowid. Or, if iFrom 6225 ** is valid - each iterator that points to a rowid before iFrom. */ 6226 for(p=pTerm; p; p=p->pSynonym){ 6227 if( sqlite3Fts5IterEof(p->pIter)==0 ){ 6228 i64 ii = p->pIter->iRowid; 6229 if( ii==iRowid 6230 || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) 6231 ){ 6232 if( bFromValid ){ 6233 rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); 6234 }else{ 6235 rc = sqlite3Fts5IterNext(p->pIter); 6236 } 6237 if( rc!=SQLITE_OK ) break; 6238 if( sqlite3Fts5IterEof(p->pIter)==0 ){ 6239 bEof = 0; 6240 } 6241 }else{ 6242 bEof = 0; 6243 } 6244 } 6245 } 6246 6247 /* Set the EOF flag if either all synonym iterators are at EOF or an 6248 ** error has occurred. */ 6249 pNode->bEof = (rc || bEof); 6250 }else{ 6251 Fts5IndexIter *pIter = pTerm->pIter; 6252 6253 assert( Fts5NodeIsString(pNode) ); 6254 if( bFromValid ){ 6255 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); 6256 }else{ 6257 rc = sqlite3Fts5IterNext(pIter); 6258 } 6259 6260 pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); 6261 } 6262 6263 if( pNode->bEof==0 ){ 6264 assert( rc==SQLITE_OK ); 6265 rc = fts5ExprNodeTest_STRING(pExpr, pNode); 6266 } 6267 6268 return rc; 6269 } 6270 6271 6272 static int fts5ExprNodeTest_TERM( 6273 Fts5Expr *pExpr, /* Expression that pNear is a part of */ 6274 Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ 6275 ){ 6276 /* As this "NEAR" object is actually a single phrase that consists 6277 ** of a single term only, grab pointers into the poslist managed by the 6278 ** fts5_index.c iterator object. This is much faster than synthesizing 6279 ** a new poslist the way we have to for more complicated phrase or NEAR 6280 ** expressions. */ 6281 Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; 6282 Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; 6283 6284 assert( pNode->eType==FTS5_TERM ); 6285 assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); 6286 assert( pPhrase->aTerm[0].pSynonym==0 ); 6287 6288 pPhrase->poslist.n = pIter->nData; 6289 if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ 6290 pPhrase->poslist.p = (u8*)pIter->pData; 6291 } 6292 pNode->iRowid = pIter->iRowid; 6293 pNode->bNomatch = (pPhrase->poslist.n==0); 6294 return SQLITE_OK; 6295 } 6296 6297 /* 6298 ** xNext() method for a node of type FTS5_TERM. 6299 */ 6300 static int fts5ExprNodeNext_TERM( 6301 Fts5Expr *pExpr, 6302 Fts5ExprNode *pNode, 6303 int bFromValid, 6304 i64 iFrom 6305 ){ 6306 int rc; 6307 Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; 6308 6309 assert( pNode->bEof==0 ); 6310 if( bFromValid ){ 6311 rc = sqlite3Fts5IterNextFrom(pIter, iFrom); 6312 }else{ 6313 rc = sqlite3Fts5IterNext(pIter); 6314 } 6315 if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ 6316 rc = fts5ExprNodeTest_TERM(pExpr, pNode); 6317 }else{ 6318 pNode->bEof = 1; 6319 pNode->bNomatch = 0; 6320 } 6321 return rc; 6322 } 6323 6324 static void fts5ExprNodeTest_OR( 6325 Fts5Expr *pExpr, /* Expression of which pNode is a part */ 6326 Fts5ExprNode *pNode /* Expression node to test */ 6327 ){ 6328 Fts5ExprNode *pNext = pNode->apChild[0]; 6329 int i; 6330 6331 for(i=1; i<pNode->nChild; i++){ 6332 Fts5ExprNode *pChild = pNode->apChild[i]; 6333 int cmp = fts5NodeCompare(pExpr, pNext, pChild); 6334 if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ 6335 pNext = pChild; 6336 } 6337 } 6338 pNode->iRowid = pNext->iRowid; 6339 pNode->bEof = pNext->bEof; 6340 pNode->bNomatch = pNext->bNomatch; 6341 } 6342 6343 static int fts5ExprNodeNext_OR( 6344 Fts5Expr *pExpr, 6345 Fts5ExprNode *pNode, 6346 int bFromValid, 6347 i64 iFrom 6348 ){ 6349 int i; 6350 i64 iLast = pNode->iRowid; 6351 6352 for(i=0; i<pNode->nChild; i++){ 6353 Fts5ExprNode *p1 = pNode->apChild[i]; 6354 assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); 6355 if( p1->bEof==0 ){ 6356 if( (p1->iRowid==iLast) 6357 || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) 6358 ){ 6359 int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); 6360 if( rc!=SQLITE_OK ){ 6361 pNode->bNomatch = 0; 6362 return rc; 6363 } 6364 } 6365 } 6366 } 6367 6368 fts5ExprNodeTest_OR(pExpr, pNode); 6369 return SQLITE_OK; 6370 } 6371 6372 /* 6373 ** Argument pNode is an FTS5_AND node. 6374 */ 6375 static int fts5ExprNodeTest_AND( 6376 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ 6377 Fts5ExprNode *pAnd /* FTS5_AND node to advance */ 6378 ){ 6379 int iChild; 6380 i64 iLast = pAnd->iRowid; 6381 int rc = SQLITE_OK; 6382 int bMatch; 6383 6384 assert( pAnd->bEof==0 ); 6385 do { 6386 pAnd->bNomatch = 0; 6387 bMatch = 1; 6388 for(iChild=0; iChild<pAnd->nChild; iChild++){ 6389 Fts5ExprNode *pChild = pAnd->apChild[iChild]; 6390 int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); 6391 if( cmp>0 ){ 6392 /* Advance pChild until it points to iLast or laster */ 6393 rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); 6394 if( rc!=SQLITE_OK ){ 6395 pAnd->bNomatch = 0; 6396 return rc; 6397 } 6398 } 6399 6400 /* If the child node is now at EOF, so is the parent AND node. Otherwise, 6401 ** the child node is guaranteed to have advanced at least as far as 6402 ** rowid iLast. So if it is not at exactly iLast, pChild->iRowid is the 6403 ** new lastest rowid seen so far. */ 6404 assert( pChild->bEof || fts5RowidCmp(pExpr, iLast, pChild->iRowid)<=0 ); 6405 if( pChild->bEof ){ 6406 fts5ExprSetEof(pAnd); 6407 bMatch = 1; 6408 break; 6409 }else if( iLast!=pChild->iRowid ){ 6410 bMatch = 0; 6411 iLast = pChild->iRowid; 6412 } 6413 6414 if( pChild->bNomatch ){ 6415 pAnd->bNomatch = 1; 6416 } 6417 } 6418 }while( bMatch==0 ); 6419 6420 if( pAnd->bNomatch && pAnd!=pExpr->pRoot ){ 6421 fts5ExprNodeZeroPoslist(pAnd); 6422 } 6423 pAnd->iRowid = iLast; 6424 return SQLITE_OK; 6425 } 6426 6427 static int fts5ExprNodeNext_AND( 6428 Fts5Expr *pExpr, 6429 Fts5ExprNode *pNode, 6430 int bFromValid, 6431 i64 iFrom 6432 ){ 6433 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); 6434 if( rc==SQLITE_OK ){ 6435 rc = fts5ExprNodeTest_AND(pExpr, pNode); 6436 }else{ 6437 pNode->bNomatch = 0; 6438 } 6439 return rc; 6440 } 6441 6442 static int fts5ExprNodeTest_NOT( 6443 Fts5Expr *pExpr, /* Expression pPhrase belongs to */ 6444 Fts5ExprNode *pNode /* FTS5_NOT node to advance */ 6445 ){ 6446 int rc = SQLITE_OK; 6447 Fts5ExprNode *p1 = pNode->apChild[0]; 6448 Fts5ExprNode *p2 = pNode->apChild[1]; 6449 assert( pNode->nChild==2 ); 6450 6451 while( rc==SQLITE_OK && p1->bEof==0 ){ 6452 int cmp = fts5NodeCompare(pExpr, p1, p2); 6453 if( cmp>0 ){ 6454 rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); 6455 cmp = fts5NodeCompare(pExpr, p1, p2); 6456 } 6457 assert( rc!=SQLITE_OK || cmp<=0 ); 6458 if( cmp || p2->bNomatch ) break; 6459 rc = fts5ExprNodeNext(pExpr, p1, 0, 0); 6460 } 6461 pNode->bEof = p1->bEof; 6462 pNode->bNomatch = p1->bNomatch; 6463 pNode->iRowid = p1->iRowid; 6464 if( p1->bEof ){ 6465 fts5ExprNodeZeroPoslist(p2); 6466 } 6467 return rc; 6468 } 6469 6470 static int fts5ExprNodeNext_NOT( 6471 Fts5Expr *pExpr, 6472 Fts5ExprNode *pNode, 6473 int bFromValid, 6474 i64 iFrom 6475 ){ 6476 int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); 6477 if( rc==SQLITE_OK ){ 6478 rc = fts5ExprNodeTest_NOT(pExpr, pNode); 6479 } 6480 if( rc!=SQLITE_OK ){ 6481 pNode->bNomatch = 0; 6482 } 6483 return rc; 6484 } 6485 6486 /* 6487 ** If pNode currently points to a match, this function returns SQLITE_OK 6488 ** without modifying it. Otherwise, pNode is advanced until it does point 6489 ** to a match or EOF is reached. 6490 */ 6491 static int fts5ExprNodeTest( 6492 Fts5Expr *pExpr, /* Expression of which pNode is a part */ 6493 Fts5ExprNode *pNode /* Expression node to test */ 6494 ){ 6495 int rc = SQLITE_OK; 6496 if( pNode->bEof==0 ){ 6497 switch( pNode->eType ){ 6498 6499 case FTS5_STRING: { 6500 rc = fts5ExprNodeTest_STRING(pExpr, pNode); 6501 break; 6502 } 6503 6504 case FTS5_TERM: { 6505 rc = fts5ExprNodeTest_TERM(pExpr, pNode); 6506 break; 6507 } 6508 6509 case FTS5_AND: { 6510 rc = fts5ExprNodeTest_AND(pExpr, pNode); 6511 break; 6512 } 6513 6514 case FTS5_OR: { 6515 fts5ExprNodeTest_OR(pExpr, pNode); 6516 break; 6517 } 6518 6519 default: assert( pNode->eType==FTS5_NOT ); { 6520 rc = fts5ExprNodeTest_NOT(pExpr, pNode); 6521 break; 6522 } 6523 } 6524 } 6525 return rc; 6526 } 6527 6528 6529 /* 6530 ** Set node pNode, which is part of expression pExpr, to point to the first 6531 ** match. If there are no matches, set the Node.bEof flag to indicate EOF. 6532 ** 6533 ** Return an SQLite error code if an error occurs, or SQLITE_OK otherwise. 6534 ** It is not an error if there are no matches. 6535 */ 6536 static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ 6537 int rc = SQLITE_OK; 6538 pNode->bEof = 0; 6539 pNode->bNomatch = 0; 6540 6541 if( Fts5NodeIsString(pNode) ){ 6542 /* Initialize all term iterators in the NEAR object. */ 6543 rc = fts5ExprNearInitAll(pExpr, pNode); 6544 }else if( pNode->xNext==0 ){ 6545 pNode->bEof = 1; 6546 }else{ 6547 int i; 6548 int nEof = 0; 6549 for(i=0; i<pNode->nChild && rc==SQLITE_OK; i++){ 6550 Fts5ExprNode *pChild = pNode->apChild[i]; 6551 rc = fts5ExprNodeFirst(pExpr, pNode->apChild[i]); 6552 assert( pChild->bEof==0 || pChild->bEof==1 ); 6553 nEof += pChild->bEof; 6554 } 6555 pNode->iRowid = pNode->apChild[0]->iRowid; 6556 6557 switch( pNode->eType ){ 6558 case FTS5_AND: 6559 if( nEof>0 ) fts5ExprSetEof(pNode); 6560 break; 6561 6562 case FTS5_OR: 6563 if( pNode->nChild==nEof ) fts5ExprSetEof(pNode); 6564 break; 6565 6566 default: 6567 assert( pNode->eType==FTS5_NOT ); 6568 pNode->bEof = pNode->apChild[0]->bEof; 6569 break; 6570 } 6571 } 6572 6573 if( rc==SQLITE_OK ){ 6574 rc = fts5ExprNodeTest(pExpr, pNode); 6575 } 6576 return rc; 6577 } 6578 6579 6580 /* 6581 ** Begin iterating through the set of documents in index pIdx matched by 6582 ** the MATCH expression passed as the first argument. If the "bDesc" 6583 ** parameter is passed a non-zero value, iteration is in descending rowid 6584 ** order. Or, if it is zero, in ascending order. 6585 ** 6586 ** If iterating in ascending rowid order (bDesc==0), the first document 6587 ** visited is that with the smallest rowid that is larger than or equal 6588 ** to parameter iFirst. Or, if iterating in ascending order (bDesc==1), 6589 ** then the first document visited must have a rowid smaller than or 6590 ** equal to iFirst. 6591 ** 6592 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It 6593 ** is not considered an error if the query does not match any documents. 6594 */ 6595 static int sqlite3Fts5ExprFirst(Fts5Expr *p, Fts5Index *pIdx, i64 iFirst, int bDesc){ 6596 Fts5ExprNode *pRoot = p->pRoot; 6597 int rc; /* Return code */ 6598 6599 p->pIndex = pIdx; 6600 p->bDesc = bDesc; 6601 rc = fts5ExprNodeFirst(p, pRoot); 6602 6603 /* If not at EOF but the current rowid occurs earlier than iFirst in 6604 ** the iteration order, move to document iFirst or later. */ 6605 if( rc==SQLITE_OK 6606 && 0==pRoot->bEof 6607 && fts5RowidCmp(p, pRoot->iRowid, iFirst)<0 6608 ){ 6609 rc = fts5ExprNodeNext(p, pRoot, 1, iFirst); 6610 } 6611 6612 /* If the iterator is not at a real match, skip forward until it is. */ 6613 while( pRoot->bNomatch && rc==SQLITE_OK ){ 6614 assert( pRoot->bEof==0 ); 6615 rc = fts5ExprNodeNext(p, pRoot, 0, 0); 6616 } 6617 return rc; 6618 } 6619 6620 /* 6621 ** Move to the next document 6622 ** 6623 ** Return SQLITE_OK if successful, or an SQLite error code otherwise. It 6624 ** is not considered an error if the query does not match any documents. 6625 */ 6626 static int sqlite3Fts5ExprNext(Fts5Expr *p, i64 iLast){ 6627 int rc; 6628 Fts5ExprNode *pRoot = p->pRoot; 6629 assert( pRoot->bEof==0 && pRoot->bNomatch==0 ); 6630 do { 6631 rc = fts5ExprNodeNext(p, pRoot, 0, 0); 6632 assert( pRoot->bNomatch==0 || (rc==SQLITE_OK && pRoot->bEof==0) ); 6633 }while( pRoot->bNomatch ); 6634 if( fts5RowidCmp(p, pRoot->iRowid, iLast)>0 ){ 6635 pRoot->bEof = 1; 6636 } 6637 return rc; 6638 } 6639 6640 static int sqlite3Fts5ExprEof(Fts5Expr *p){ 6641 return p->pRoot->bEof; 6642 } 6643 6644 static i64 sqlite3Fts5ExprRowid(Fts5Expr *p){ 6645 return p->pRoot->iRowid; 6646 } 6647 6648 static int fts5ParseStringFromToken(Fts5Token *pToken, char **pz){ 6649 int rc = SQLITE_OK; 6650 *pz = sqlite3Fts5Strndup(&rc, pToken->p, pToken->n); 6651 return rc; 6652 } 6653 6654 /* 6655 ** Free the phrase object passed as the only argument. 6656 */ 6657 static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ 6658 if( pPhrase ){ 6659 int i; 6660 for(i=0; i<pPhrase->nTerm; i++){ 6661 Fts5ExprTerm *pSyn; 6662 Fts5ExprTerm *pNext; 6663 Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; 6664 sqlite3_free(pTerm->zTerm); 6665 sqlite3Fts5IterClose(pTerm->pIter); 6666 for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ 6667 pNext = pSyn->pSynonym; 6668 sqlite3Fts5IterClose(pSyn->pIter); 6669 fts5BufferFree((Fts5Buffer*)&pSyn[1]); 6670 sqlite3_free(pSyn); 6671 } 6672 } 6673 if( pPhrase->poslist.nSpace>0 ) fts5BufferFree(&pPhrase->poslist); 6674 sqlite3_free(pPhrase); 6675 } 6676 } 6677 6678 /* 6679 ** Set the "bFirst" flag on the first token of the phrase passed as the 6680 ** only argument. 6681 */ 6682 static void sqlite3Fts5ParseSetCaret(Fts5ExprPhrase *pPhrase){ 6683 if( pPhrase && pPhrase->nTerm ){ 6684 pPhrase->aTerm[0].bFirst = 1; 6685 } 6686 } 6687 6688 /* 6689 ** If argument pNear is NULL, then a new Fts5ExprNearset object is allocated 6690 ** and populated with pPhrase. Or, if pNear is not NULL, phrase pPhrase is 6691 ** appended to it and the results returned. 6692 ** 6693 ** If an OOM error occurs, both the pNear and pPhrase objects are freed and 6694 ** NULL returned. 6695 */ 6696 static Fts5ExprNearset *sqlite3Fts5ParseNearset( 6697 Fts5Parse *pParse, /* Parse context */ 6698 Fts5ExprNearset *pNear, /* Existing nearset, or NULL */ 6699 Fts5ExprPhrase *pPhrase /* Recently parsed phrase */ 6700 ){ 6701 const int SZALLOC = 8; 6702 Fts5ExprNearset *pRet = 0; 6703 6704 if( pParse->rc==SQLITE_OK ){ 6705 if( pPhrase==0 ){ 6706 return pNear; 6707 } 6708 if( pNear==0 ){ 6709 sqlite3_int64 nByte; 6710 nByte = sizeof(Fts5ExprNearset) + SZALLOC * sizeof(Fts5ExprPhrase*); 6711 pRet = sqlite3_malloc64(nByte); 6712 if( pRet==0 ){ 6713 pParse->rc = SQLITE_NOMEM; 6714 }else{ 6715 memset(pRet, 0, (size_t)nByte); 6716 } 6717 }else if( (pNear->nPhrase % SZALLOC)==0 ){ 6718 int nNew = pNear->nPhrase + SZALLOC; 6719 sqlite3_int64 nByte; 6720 6721 nByte = sizeof(Fts5ExprNearset) + nNew * sizeof(Fts5ExprPhrase*); 6722 pRet = (Fts5ExprNearset*)sqlite3_realloc64(pNear, nByte); 6723 if( pRet==0 ){ 6724 pParse->rc = SQLITE_NOMEM; 6725 } 6726 }else{ 6727 pRet = pNear; 6728 } 6729 } 6730 6731 if( pRet==0 ){ 6732 assert( pParse->rc!=SQLITE_OK ); 6733 sqlite3Fts5ParseNearsetFree(pNear); 6734 sqlite3Fts5ParsePhraseFree(pPhrase); 6735 }else{ 6736 if( pRet->nPhrase>0 ){ 6737 Fts5ExprPhrase *pLast = pRet->apPhrase[pRet->nPhrase-1]; 6738 assert( pParse!=0 ); 6739 assert( pParse->apPhrase!=0 ); 6740 assert( pParse->nPhrase>=2 ); 6741 assert( pLast==pParse->apPhrase[pParse->nPhrase-2] ); 6742 if( pPhrase->nTerm==0 ){ 6743 fts5ExprPhraseFree(pPhrase); 6744 pRet->nPhrase--; 6745 pParse->nPhrase--; 6746 pPhrase = pLast; 6747 }else if( pLast->nTerm==0 ){ 6748 fts5ExprPhraseFree(pLast); 6749 pParse->apPhrase[pParse->nPhrase-2] = pPhrase; 6750 pParse->nPhrase--; 6751 pRet->nPhrase--; 6752 } 6753 } 6754 pRet->apPhrase[pRet->nPhrase++] = pPhrase; 6755 } 6756 return pRet; 6757 } 6758 6759 typedef struct TokenCtx TokenCtx; 6760 struct TokenCtx { 6761 Fts5ExprPhrase *pPhrase; 6762 int rc; 6763 }; 6764 6765 /* 6766 ** Callback for tokenizing terms used by ParseTerm(). 6767 */ 6768 static int fts5ParseTokenize( 6769 void *pContext, /* Pointer to Fts5InsertCtx object */ 6770 int tflags, /* Mask of FTS5_TOKEN_* flags */ 6771 const char *pToken, /* Buffer containing token */ 6772 int nToken, /* Size of token in bytes */ 6773 int iUnused1, /* Start offset of token */ 6774 int iUnused2 /* End offset of token */ 6775 ){ 6776 int rc = SQLITE_OK; 6777 const int SZALLOC = 8; 6778 TokenCtx *pCtx = (TokenCtx*)pContext; 6779 Fts5ExprPhrase *pPhrase = pCtx->pPhrase; 6780 6781 UNUSED_PARAM2(iUnused1, iUnused2); 6782 6783 /* If an error has already occurred, this is a no-op */ 6784 if( pCtx->rc!=SQLITE_OK ) return pCtx->rc; 6785 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; 6786 6787 if( pPhrase && pPhrase->nTerm>0 && (tflags & FTS5_TOKEN_COLOCATED) ){ 6788 Fts5ExprTerm *pSyn; 6789 sqlite3_int64 nByte = sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer) + nToken+1; 6790 pSyn = (Fts5ExprTerm*)sqlite3_malloc64(nByte); 6791 if( pSyn==0 ){ 6792 rc = SQLITE_NOMEM; 6793 }else{ 6794 memset(pSyn, 0, (size_t)nByte); 6795 pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); 6796 memcpy(pSyn->zTerm, pToken, nToken); 6797 pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; 6798 pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; 6799 } 6800 }else{ 6801 Fts5ExprTerm *pTerm; 6802 if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){ 6803 Fts5ExprPhrase *pNew; 6804 int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0); 6805 6806 pNew = (Fts5ExprPhrase*)sqlite3_realloc64(pPhrase, 6807 sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * nNew 6808 ); 6809 if( pNew==0 ){ 6810 rc = SQLITE_NOMEM; 6811 }else{ 6812 if( pPhrase==0 ) memset(pNew, 0, sizeof(Fts5ExprPhrase)); 6813 pCtx->pPhrase = pPhrase = pNew; 6814 pNew->nTerm = nNew - SZALLOC; 6815 } 6816 } 6817 6818 if( rc==SQLITE_OK ){ 6819 pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; 6820 memset(pTerm, 0, sizeof(Fts5ExprTerm)); 6821 pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); 6822 } 6823 } 6824 6825 pCtx->rc = rc; 6826 return rc; 6827 } 6828 6829 6830 /* 6831 ** Free the phrase object passed as the only argument. 6832 */ 6833 static void sqlite3Fts5ParsePhraseFree(Fts5ExprPhrase *pPhrase){ 6834 fts5ExprPhraseFree(pPhrase); 6835 } 6836 6837 /* 6838 ** Free the phrase object passed as the second argument. 6839 */ 6840 static void sqlite3Fts5ParseNearsetFree(Fts5ExprNearset *pNear){ 6841 if( pNear ){ 6842 int i; 6843 for(i=0; i<pNear->nPhrase; i++){ 6844 fts5ExprPhraseFree(pNear->apPhrase[i]); 6845 } 6846 sqlite3_free(pNear->pColset); 6847 sqlite3_free(pNear); 6848 } 6849 } 6850 6851 static void sqlite3Fts5ParseFinished(Fts5Parse *pParse, Fts5ExprNode *p){ 6852 assert( pParse->pExpr==0 ); 6853 pParse->pExpr = p; 6854 } 6855 6856 static int parseGrowPhraseArray(Fts5Parse *pParse){ 6857 if( (pParse->nPhrase % 8)==0 ){ 6858 sqlite3_int64 nByte = sizeof(Fts5ExprPhrase*) * (pParse->nPhrase + 8); 6859 Fts5ExprPhrase **apNew; 6860 apNew = (Fts5ExprPhrase**)sqlite3_realloc64(pParse->apPhrase, nByte); 6861 if( apNew==0 ){ 6862 pParse->rc = SQLITE_NOMEM; 6863 return SQLITE_NOMEM; 6864 } 6865 pParse->apPhrase = apNew; 6866 } 6867 return SQLITE_OK; 6868 } 6869 6870 /* 6871 ** This function is called by the parser to process a string token. The 6872 ** string may or may not be quoted. In any case it is tokenized and a 6873 ** phrase object consisting of all tokens returned. 6874 */ 6875 static Fts5ExprPhrase *sqlite3Fts5ParseTerm( 6876 Fts5Parse *pParse, /* Parse context */ 6877 Fts5ExprPhrase *pAppend, /* Phrase to append to */ 6878 Fts5Token *pToken, /* String to tokenize */ 6879 int bPrefix /* True if there is a trailing "*" */ 6880 ){ 6881 Fts5Config *pConfig = pParse->pConfig; 6882 TokenCtx sCtx; /* Context object passed to callback */ 6883 int rc; /* Tokenize return code */ 6884 char *z = 0; 6885 6886 memset(&sCtx, 0, sizeof(TokenCtx)); 6887 sCtx.pPhrase = pAppend; 6888 6889 rc = fts5ParseStringFromToken(pToken, &z); 6890 if( rc==SQLITE_OK ){ 6891 int flags = FTS5_TOKENIZE_QUERY | (bPrefix ? FTS5_TOKENIZE_PREFIX : 0); 6892 int n; 6893 sqlite3Fts5Dequote(z); 6894 n = (int)strlen(z); 6895 rc = sqlite3Fts5Tokenize(pConfig, flags, z, n, &sCtx, fts5ParseTokenize); 6896 } 6897 sqlite3_free(z); 6898 if( rc || (rc = sCtx.rc) ){ 6899 pParse->rc = rc; 6900 fts5ExprPhraseFree(sCtx.pPhrase); 6901 sCtx.pPhrase = 0; 6902 }else{ 6903 6904 if( pAppend==0 ){ 6905 if( parseGrowPhraseArray(pParse) ){ 6906 fts5ExprPhraseFree(sCtx.pPhrase); 6907 return 0; 6908 } 6909 pParse->nPhrase++; 6910 } 6911 6912 if( sCtx.pPhrase==0 ){ 6913 /* This happens when parsing a token or quoted phrase that contains 6914 ** no token characters at all. (e.g ... MATCH '""'). */ 6915 sCtx.pPhrase = sqlite3Fts5MallocZero(&pParse->rc, sizeof(Fts5ExprPhrase)); 6916 }else if( sCtx.pPhrase->nTerm ){ 6917 sCtx.pPhrase->aTerm[sCtx.pPhrase->nTerm-1].bPrefix = (u8)bPrefix; 6918 } 6919 pParse->apPhrase[pParse->nPhrase-1] = sCtx.pPhrase; 6920 } 6921 6922 return sCtx.pPhrase; 6923 } 6924 6925 /* 6926 ** Create a new FTS5 expression by cloning phrase iPhrase of the 6927 ** expression passed as the second argument. 6928 */ 6929 static int sqlite3Fts5ExprClonePhrase( 6930 Fts5Expr *pExpr, 6931 int iPhrase, 6932 Fts5Expr **ppNew 6933 ){ 6934 int rc = SQLITE_OK; /* Return code */ 6935 Fts5ExprPhrase *pOrig; /* The phrase extracted from pExpr */ 6936 Fts5Expr *pNew = 0; /* Expression to return via *ppNew */ 6937 TokenCtx sCtx = {0,0}; /* Context object for fts5ParseTokenize */ 6938 6939 pOrig = pExpr->apExprPhrase[iPhrase]; 6940 pNew = (Fts5Expr*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Expr)); 6941 if( rc==SQLITE_OK ){ 6942 pNew->apExprPhrase = (Fts5ExprPhrase**)sqlite3Fts5MallocZero(&rc, 6943 sizeof(Fts5ExprPhrase*)); 6944 } 6945 if( rc==SQLITE_OK ){ 6946 pNew->pRoot = (Fts5ExprNode*)sqlite3Fts5MallocZero(&rc, 6947 sizeof(Fts5ExprNode)); 6948 } 6949 if( rc==SQLITE_OK ){ 6950 pNew->pRoot->pNear = (Fts5ExprNearset*)sqlite3Fts5MallocZero(&rc, 6951 sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)); 6952 } 6953 if( rc==SQLITE_OK ){ 6954 Fts5Colset *pColsetOrig = pOrig->pNode->pNear->pColset; 6955 if( pColsetOrig ){ 6956 sqlite3_int64 nByte; 6957 Fts5Colset *pColset; 6958 nByte = sizeof(Fts5Colset) + (pColsetOrig->nCol-1) * sizeof(int); 6959 pColset = (Fts5Colset*)sqlite3Fts5MallocZero(&rc, nByte); 6960 if( pColset ){ 6961 memcpy(pColset, pColsetOrig, (size_t)nByte); 6962 } 6963 pNew->pRoot->pNear->pColset = pColset; 6964 } 6965 } 6966 6967 if( pOrig->nTerm ){ 6968 int i; /* Used to iterate through phrase terms */ 6969 for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){ 6970 int tflags = 0; 6971 Fts5ExprTerm *p; 6972 for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ 6973 const char *zTerm = p->zTerm; 6974 rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), 6975 0, 0); 6976 tflags = FTS5_TOKEN_COLOCATED; 6977 } 6978 if( rc==SQLITE_OK ){ 6979 sCtx.pPhrase->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; 6980 sCtx.pPhrase->aTerm[i].bFirst = pOrig->aTerm[i].bFirst; 6981 } 6982 } 6983 }else{ 6984 /* This happens when parsing a token or quoted phrase that contains 6985 ** no token characters at all. (e.g ... MATCH '""'). */ 6986 sCtx.pPhrase = sqlite3Fts5MallocZero(&rc, sizeof(Fts5ExprPhrase)); 6987 } 6988 6989 if( rc==SQLITE_OK && ALWAYS(sCtx.pPhrase) ){ 6990 /* All the allocations succeeded. Put the expression object together. */ 6991 pNew->pIndex = pExpr->pIndex; 6992 pNew->pConfig = pExpr->pConfig; 6993 pNew->nPhrase = 1; 6994 pNew->apExprPhrase[0] = sCtx.pPhrase; 6995 pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; 6996 pNew->pRoot->pNear->nPhrase = 1; 6997 sCtx.pPhrase->pNode = pNew->pRoot; 6998 6999 if( pOrig->nTerm==1 7000 && pOrig->aTerm[0].pSynonym==0 7001 && pOrig->aTerm[0].bFirst==0 7002 ){ 7003 pNew->pRoot->eType = FTS5_TERM; 7004 pNew->pRoot->xNext = fts5ExprNodeNext_TERM; 7005 }else{ 7006 pNew->pRoot->eType = FTS5_STRING; 7007 pNew->pRoot->xNext = fts5ExprNodeNext_STRING; 7008 } 7009 }else{ 7010 sqlite3Fts5ExprFree(pNew); 7011 fts5ExprPhraseFree(sCtx.pPhrase); 7012 pNew = 0; 7013 } 7014 7015 *ppNew = pNew; 7016 return rc; 7017 } 7018 7019 7020 /* 7021 ** Token pTok has appeared in a MATCH expression where the NEAR operator 7022 ** is expected. If token pTok does not contain "NEAR", store an error 7023 ** in the pParse object. 7024 */ 7025 static void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ 7026 if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ 7027 sqlite3Fts5ParseError( 7028 pParse, "fts5: syntax error near \"%.*s\"", pTok->n, pTok->p 7029 ); 7030 } 7031 } 7032 7033 static void sqlite3Fts5ParseSetDistance( 7034 Fts5Parse *pParse, 7035 Fts5ExprNearset *pNear, 7036 Fts5Token *p 7037 ){ 7038 if( pNear ){ 7039 int nNear = 0; 7040 int i; 7041 if( p->n ){ 7042 for(i=0; i<p->n; i++){ 7043 char c = (char)p->p[i]; 7044 if( c<'0' || c>'9' ){ 7045 sqlite3Fts5ParseError( 7046 pParse, "expected integer, got \"%.*s\"", p->n, p->p 7047 ); 7048 return; 7049 } 7050 nNear = nNear * 10 + (p->p[i] - '0'); 7051 } 7052 }else{ 7053 nNear = FTS5_DEFAULT_NEARDIST; 7054 } 7055 pNear->nNear = nNear; 7056 } 7057 } 7058 7059 /* 7060 ** The second argument passed to this function may be NULL, or it may be 7061 ** an existing Fts5Colset object. This function returns a pointer to 7062 ** a new colset object containing the contents of (p) with new value column 7063 ** number iCol appended. 7064 ** 7065 ** If an OOM error occurs, store an error code in pParse and return NULL. 7066 ** The old colset object (if any) is not freed in this case. 7067 */ 7068 static Fts5Colset *fts5ParseColset( 7069 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ 7070 Fts5Colset *p, /* Existing colset object */ 7071 int iCol /* New column to add to colset object */ 7072 ){ 7073 int nCol = p ? p->nCol : 0; /* Num. columns already in colset object */ 7074 Fts5Colset *pNew; /* New colset object to return */ 7075 7076 assert( pParse->rc==SQLITE_OK ); 7077 assert( iCol>=0 && iCol<pParse->pConfig->nCol ); 7078 7079 pNew = sqlite3_realloc64(p, sizeof(Fts5Colset) + sizeof(int)*nCol); 7080 if( pNew==0 ){ 7081 pParse->rc = SQLITE_NOMEM; 7082 }else{ 7083 int *aiCol = pNew->aiCol; 7084 int i, j; 7085 for(i=0; i<nCol; i++){ 7086 if( aiCol[i]==iCol ) return pNew; 7087 if( aiCol[i]>iCol ) break; 7088 } 7089 for(j=nCol; j>i; j--){ 7090 aiCol[j] = aiCol[j-1]; 7091 } 7092 aiCol[i] = iCol; 7093 pNew->nCol = nCol+1; 7094 7095 #ifndef NDEBUG 7096 /* Check that the array is in order and contains no duplicate entries. */ 7097 for(i=1; i<pNew->nCol; i++) assert( pNew->aiCol[i]>pNew->aiCol[i-1] ); 7098 #endif 7099 } 7100 7101 return pNew; 7102 } 7103 7104 /* 7105 ** Allocate and return an Fts5Colset object specifying the inverse of 7106 ** the colset passed as the second argument. Free the colset passed 7107 ** as the second argument before returning. 7108 */ 7109 static Fts5Colset *sqlite3Fts5ParseColsetInvert(Fts5Parse *pParse, Fts5Colset *p){ 7110 Fts5Colset *pRet; 7111 int nCol = pParse->pConfig->nCol; 7112 7113 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(&pParse->rc, 7114 sizeof(Fts5Colset) + sizeof(int)*nCol 7115 ); 7116 if( pRet ){ 7117 int i; 7118 int iOld = 0; 7119 for(i=0; i<nCol; i++){ 7120 if( iOld>=p->nCol || p->aiCol[iOld]!=i ){ 7121 pRet->aiCol[pRet->nCol++] = i; 7122 }else{ 7123 iOld++; 7124 } 7125 } 7126 } 7127 7128 sqlite3_free(p); 7129 return pRet; 7130 } 7131 7132 static Fts5Colset *sqlite3Fts5ParseColset( 7133 Fts5Parse *pParse, /* Store SQLITE_NOMEM here if required */ 7134 Fts5Colset *pColset, /* Existing colset object */ 7135 Fts5Token *p 7136 ){ 7137 Fts5Colset *pRet = 0; 7138 int iCol; 7139 char *z; /* Dequoted copy of token p */ 7140 7141 z = sqlite3Fts5Strndup(&pParse->rc, p->p, p->n); 7142 if( pParse->rc==SQLITE_OK ){ 7143 Fts5Config *pConfig = pParse->pConfig; 7144 sqlite3Fts5Dequote(z); 7145 for(iCol=0; iCol<pConfig->nCol; iCol++){ 7146 if( 0==sqlite3_stricmp(pConfig->azCol[iCol], z) ) break; 7147 } 7148 if( iCol==pConfig->nCol ){ 7149 sqlite3Fts5ParseError(pParse, "no such column: %s", z); 7150 }else{ 7151 pRet = fts5ParseColset(pParse, pColset, iCol); 7152 } 7153 sqlite3_free(z); 7154 } 7155 7156 if( pRet==0 ){ 7157 assert( pParse->rc!=SQLITE_OK ); 7158 sqlite3_free(pColset); 7159 } 7160 7161 return pRet; 7162 } 7163 7164 /* 7165 ** If argument pOrig is NULL, or if (*pRc) is set to anything other than 7166 ** SQLITE_OK when this function is called, NULL is returned. 7167 ** 7168 ** Otherwise, a copy of (*pOrig) is made into memory obtained from 7169 ** sqlite3Fts5MallocZero() and a pointer to it returned. If the allocation 7170 ** fails, (*pRc) is set to SQLITE_NOMEM and NULL is returned. 7171 */ 7172 static Fts5Colset *fts5CloneColset(int *pRc, Fts5Colset *pOrig){ 7173 Fts5Colset *pRet; 7174 if( pOrig ){ 7175 sqlite3_int64 nByte = sizeof(Fts5Colset) + (pOrig->nCol-1) * sizeof(int); 7176 pRet = (Fts5Colset*)sqlite3Fts5MallocZero(pRc, nByte); 7177 if( pRet ){ 7178 memcpy(pRet, pOrig, (size_t)nByte); 7179 } 7180 }else{ 7181 pRet = 0; 7182 } 7183 return pRet; 7184 } 7185 7186 /* 7187 ** Remove from colset pColset any columns that are not also in colset pMerge. 7188 */ 7189 static void fts5MergeColset(Fts5Colset *pColset, Fts5Colset *pMerge){ 7190 int iIn = 0; /* Next input in pColset */ 7191 int iMerge = 0; /* Next input in pMerge */ 7192 int iOut = 0; /* Next output slot in pColset */ 7193 7194 while( iIn<pColset->nCol && iMerge<pMerge->nCol ){ 7195 int iDiff = pColset->aiCol[iIn] - pMerge->aiCol[iMerge]; 7196 if( iDiff==0 ){ 7197 pColset->aiCol[iOut++] = pMerge->aiCol[iMerge]; 7198 iMerge++; 7199 iIn++; 7200 }else if( iDiff>0 ){ 7201 iMerge++; 7202 }else{ 7203 iIn++; 7204 } 7205 } 7206 pColset->nCol = iOut; 7207 } 7208 7209 /* 7210 ** Recursively apply colset pColset to expression node pNode and all of 7211 ** its decendents. If (*ppFree) is not NULL, it contains a spare copy 7212 ** of pColset. This function may use the spare copy and set (*ppFree) to 7213 ** zero, or it may create copies of pColset using fts5CloneColset(). 7214 */ 7215 static void fts5ParseSetColset( 7216 Fts5Parse *pParse, 7217 Fts5ExprNode *pNode, 7218 Fts5Colset *pColset, 7219 Fts5Colset **ppFree 7220 ){ 7221 if( pParse->rc==SQLITE_OK ){ 7222 assert( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING 7223 || pNode->eType==FTS5_AND || pNode->eType==FTS5_OR 7224 || pNode->eType==FTS5_NOT || pNode->eType==FTS5_EOF 7225 ); 7226 if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ 7227 Fts5ExprNearset *pNear = pNode->pNear; 7228 if( pNear->pColset ){ 7229 fts5MergeColset(pNear->pColset, pColset); 7230 if( pNear->pColset->nCol==0 ){ 7231 pNode->eType = FTS5_EOF; 7232 pNode->xNext = 0; 7233 } 7234 }else if( *ppFree ){ 7235 pNear->pColset = pColset; 7236 *ppFree = 0; 7237 }else{ 7238 pNear->pColset = fts5CloneColset(&pParse->rc, pColset); 7239 } 7240 }else{ 7241 int i; 7242 assert( pNode->eType!=FTS5_EOF || pNode->nChild==0 ); 7243 for(i=0; i<pNode->nChild; i++){ 7244 fts5ParseSetColset(pParse, pNode->apChild[i], pColset, ppFree); 7245 } 7246 } 7247 } 7248 } 7249 7250 /* 7251 ** Apply colset pColset to expression node pExpr and all of its descendents. 7252 */ 7253 static void sqlite3Fts5ParseSetColset( 7254 Fts5Parse *pParse, 7255 Fts5ExprNode *pExpr, 7256 Fts5Colset *pColset 7257 ){ 7258 Fts5Colset *pFree = pColset; 7259 if( pParse->pConfig->eDetail==FTS5_DETAIL_NONE ){ 7260 sqlite3Fts5ParseError(pParse, 7261 "fts5: column queries are not supported (detail=none)" 7262 ); 7263 }else{ 7264 fts5ParseSetColset(pParse, pExpr, pColset, &pFree); 7265 } 7266 sqlite3_free(pFree); 7267 } 7268 7269 static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ 7270 switch( pNode->eType ){ 7271 case FTS5_STRING: { 7272 Fts5ExprNearset *pNear = pNode->pNear; 7273 if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 7274 && pNear->apPhrase[0]->aTerm[0].pSynonym==0 7275 && pNear->apPhrase[0]->aTerm[0].bFirst==0 7276 ){ 7277 pNode->eType = FTS5_TERM; 7278 pNode->xNext = fts5ExprNodeNext_TERM; 7279 }else{ 7280 pNode->xNext = fts5ExprNodeNext_STRING; 7281 } 7282 break; 7283 }; 7284 7285 case FTS5_OR: { 7286 pNode->xNext = fts5ExprNodeNext_OR; 7287 break; 7288 }; 7289 7290 case FTS5_AND: { 7291 pNode->xNext = fts5ExprNodeNext_AND; 7292 break; 7293 }; 7294 7295 default: assert( pNode->eType==FTS5_NOT ); { 7296 pNode->xNext = fts5ExprNodeNext_NOT; 7297 break; 7298 }; 7299 } 7300 } 7301 7302 static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ 7303 if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ 7304 int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; 7305 memcpy(&p->apChild[p->nChild], pSub->apChild, nByte); 7306 p->nChild += pSub->nChild; 7307 sqlite3_free(pSub); 7308 }else{ 7309 p->apChild[p->nChild++] = pSub; 7310 } 7311 } 7312 7313 /* 7314 ** This function is used when parsing LIKE or GLOB patterns against 7315 ** trigram indexes that specify either detail=column or detail=none. 7316 ** It converts a phrase: 7317 ** 7318 ** abc + def + ghi 7319 ** 7320 ** into an AND tree: 7321 ** 7322 ** abc AND def AND ghi 7323 */ 7324 static Fts5ExprNode *fts5ParsePhraseToAnd( 7325 Fts5Parse *pParse, 7326 Fts5ExprNearset *pNear 7327 ){ 7328 int nTerm = pNear->apPhrase[0]->nTerm; 7329 int ii; 7330 int nByte; 7331 Fts5ExprNode *pRet; 7332 7333 assert( pNear->nPhrase==1 ); 7334 assert( pParse->bPhraseToAnd ); 7335 7336 nByte = sizeof(Fts5ExprNode) + nTerm*sizeof(Fts5ExprNode*); 7337 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); 7338 if( pRet ){ 7339 pRet->eType = FTS5_AND; 7340 pRet->nChild = nTerm; 7341 fts5ExprAssignXNext(pRet); 7342 pParse->nPhrase--; 7343 for(ii=0; ii<nTerm; ii++){ 7344 Fts5ExprPhrase *pPhrase = (Fts5ExprPhrase*)sqlite3Fts5MallocZero( 7345 &pParse->rc, sizeof(Fts5ExprPhrase) 7346 ); 7347 if( pPhrase ){ 7348 if( parseGrowPhraseArray(pParse) ){ 7349 fts5ExprPhraseFree(pPhrase); 7350 }else{ 7351 pParse->apPhrase[pParse->nPhrase++] = pPhrase; 7352 pPhrase->nTerm = 1; 7353 pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup( 7354 &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1 7355 ); 7356 pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 7357 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) 7358 ); 7359 } 7360 } 7361 } 7362 7363 if( pParse->rc ){ 7364 sqlite3Fts5ParseNodeFree(pRet); 7365 pRet = 0; 7366 }else{ 7367 sqlite3Fts5ParseNearsetFree(pNear); 7368 } 7369 } 7370 7371 return pRet; 7372 } 7373 7374 /* 7375 ** Allocate and return a new expression object. If anything goes wrong (i.e. 7376 ** OOM error), leave an error code in pParse and return NULL. 7377 */ 7378 static Fts5ExprNode *sqlite3Fts5ParseNode( 7379 Fts5Parse *pParse, /* Parse context */ 7380 int eType, /* FTS5_STRING, AND, OR or NOT */ 7381 Fts5ExprNode *pLeft, /* Left hand child expression */ 7382 Fts5ExprNode *pRight, /* Right hand child expression */ 7383 Fts5ExprNearset *pNear /* For STRING expressions, the near cluster */ 7384 ){ 7385 Fts5ExprNode *pRet = 0; 7386 7387 if( pParse->rc==SQLITE_OK ){ 7388 int nChild = 0; /* Number of children of returned node */ 7389 sqlite3_int64 nByte; /* Bytes of space to allocate for this node */ 7390 7391 assert( (eType!=FTS5_STRING && !pNear) 7392 || (eType==FTS5_STRING && !pLeft && !pRight) 7393 ); 7394 if( eType==FTS5_STRING && pNear==0 ) return 0; 7395 if( eType!=FTS5_STRING && pLeft==0 ) return pRight; 7396 if( eType!=FTS5_STRING && pRight==0 ) return pLeft; 7397 7398 if( eType==FTS5_STRING 7399 && pParse->bPhraseToAnd 7400 && pNear->apPhrase[0]->nTerm>1 7401 ){ 7402 pRet = fts5ParsePhraseToAnd(pParse, pNear); 7403 }else{ 7404 if( eType==FTS5_NOT ){ 7405 nChild = 2; 7406 }else if( eType==FTS5_AND || eType==FTS5_OR ){ 7407 nChild = 2; 7408 if( pLeft->eType==eType ) nChild += pLeft->nChild-1; 7409 if( pRight->eType==eType ) nChild += pRight->nChild-1; 7410 } 7411 7412 nByte = sizeof(Fts5ExprNode) + sizeof(Fts5ExprNode*)*(nChild-1); 7413 pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); 7414 7415 if( pRet ){ 7416 pRet->eType = eType; 7417 pRet->pNear = pNear; 7418 fts5ExprAssignXNext(pRet); 7419 if( eType==FTS5_STRING ){ 7420 int iPhrase; 7421 for(iPhrase=0; iPhrase<pNear->nPhrase; iPhrase++){ 7422 pNear->apPhrase[iPhrase]->pNode = pRet; 7423 if( pNear->apPhrase[iPhrase]->nTerm==0 ){ 7424 pRet->xNext = 0; 7425 pRet->eType = FTS5_EOF; 7426 } 7427 } 7428 7429 if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){ 7430 Fts5ExprPhrase *pPhrase = pNear->apPhrase[0]; 7431 if( pNear->nPhrase!=1 7432 || pPhrase->nTerm>1 7433 || (pPhrase->nTerm>0 && pPhrase->aTerm[0].bFirst) 7434 ){ 7435 sqlite3Fts5ParseError(pParse, 7436 "fts5: %s queries are not supported (detail!=full)", 7437 pNear->nPhrase==1 ? "phrase": "NEAR" 7438 ); 7439 sqlite3_free(pRet); 7440 pRet = 0; 7441 } 7442 } 7443 }else{ 7444 fts5ExprAddChildren(pRet, pLeft); 7445 fts5ExprAddChildren(pRet, pRight); 7446 } 7447 } 7448 } 7449 } 7450 7451 if( pRet==0 ){ 7452 assert( pParse->rc!=SQLITE_OK ); 7453 sqlite3Fts5ParseNodeFree(pLeft); 7454 sqlite3Fts5ParseNodeFree(pRight); 7455 sqlite3Fts5ParseNearsetFree(pNear); 7456 } 7457 return pRet; 7458 } 7459 7460 static Fts5ExprNode *sqlite3Fts5ParseImplicitAnd( 7461 Fts5Parse *pParse, /* Parse context */ 7462 Fts5ExprNode *pLeft, /* Left hand child expression */ 7463 Fts5ExprNode *pRight /* Right hand child expression */ 7464 ){ 7465 Fts5ExprNode *pRet = 0; 7466 Fts5ExprNode *pPrev; 7467 7468 if( pParse->rc ){ 7469 sqlite3Fts5ParseNodeFree(pLeft); 7470 sqlite3Fts5ParseNodeFree(pRight); 7471 }else{ 7472 7473 assert( pLeft->eType==FTS5_STRING 7474 || pLeft->eType==FTS5_TERM 7475 || pLeft->eType==FTS5_EOF 7476 || pLeft->eType==FTS5_AND 7477 ); 7478 assert( pRight->eType==FTS5_STRING 7479 || pRight->eType==FTS5_TERM 7480 || pRight->eType==FTS5_EOF 7481 ); 7482 7483 if( pLeft->eType==FTS5_AND ){ 7484 pPrev = pLeft->apChild[pLeft->nChild-1]; 7485 }else{ 7486 pPrev = pLeft; 7487 } 7488 assert( pPrev->eType==FTS5_STRING 7489 || pPrev->eType==FTS5_TERM 7490 || pPrev->eType==FTS5_EOF 7491 ); 7492 7493 if( pRight->eType==FTS5_EOF ){ 7494 assert( pParse->apPhrase[pParse->nPhrase-1]==pRight->pNear->apPhrase[0] ); 7495 sqlite3Fts5ParseNodeFree(pRight); 7496 pRet = pLeft; 7497 pParse->nPhrase--; 7498 } 7499 else if( pPrev->eType==FTS5_EOF ){ 7500 Fts5ExprPhrase **ap; 7501 7502 if( pPrev==pLeft ){ 7503 pRet = pRight; 7504 }else{ 7505 pLeft->apChild[pLeft->nChild-1] = pRight; 7506 pRet = pLeft; 7507 } 7508 7509 ap = &pParse->apPhrase[pParse->nPhrase-1-pRight->pNear->nPhrase]; 7510 assert( ap[0]==pPrev->pNear->apPhrase[0] ); 7511 memmove(ap, &ap[1], sizeof(Fts5ExprPhrase*)*pRight->pNear->nPhrase); 7512 pParse->nPhrase--; 7513 7514 sqlite3Fts5ParseNodeFree(pPrev); 7515 } 7516 else{ 7517 pRet = sqlite3Fts5ParseNode(pParse, FTS5_AND, pLeft, pRight, 0); 7518 } 7519 } 7520 7521 return pRet; 7522 } 7523 7524 #ifdef SQLITE_TEST 7525 static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ 7526 sqlite3_int64 nByte = 0; 7527 Fts5ExprTerm *p; 7528 char *zQuoted; 7529 7530 /* Determine the maximum amount of space required. */ 7531 for(p=pTerm; p; p=p->pSynonym){ 7532 nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; 7533 } 7534 zQuoted = sqlite3_malloc64(nByte); 7535 7536 if( zQuoted ){ 7537 int i = 0; 7538 for(p=pTerm; p; p=p->pSynonym){ 7539 char *zIn = p->zTerm; 7540 zQuoted[i++] = '"'; 7541 while( *zIn ){ 7542 if( *zIn=='"' ) zQuoted[i++] = '"'; 7543 zQuoted[i++] = *zIn++; 7544 } 7545 zQuoted[i++] = '"'; 7546 if( p->pSynonym ) zQuoted[i++] = '|'; 7547 } 7548 if( pTerm->bPrefix ){ 7549 zQuoted[i++] = ' '; 7550 zQuoted[i++] = '*'; 7551 } 7552 zQuoted[i++] = '\0'; 7553 } 7554 return zQuoted; 7555 } 7556 7557 static char *fts5PrintfAppend(char *zApp, const char *zFmt, ...){ 7558 char *zNew; 7559 va_list ap; 7560 va_start(ap, zFmt); 7561 zNew = sqlite3_vmprintf(zFmt, ap); 7562 va_end(ap); 7563 if( zApp && zNew ){ 7564 char *zNew2 = sqlite3_mprintf("%s%s", zApp, zNew); 7565 sqlite3_free(zNew); 7566 zNew = zNew2; 7567 } 7568 sqlite3_free(zApp); 7569 return zNew; 7570 } 7571 7572 /* 7573 ** Compose a tcl-readable representation of expression pExpr. Return a 7574 ** pointer to a buffer containing that representation. It is the 7575 ** responsibility of the caller to at some point free the buffer using 7576 ** sqlite3_free(). 7577 */ 7578 static char *fts5ExprPrintTcl( 7579 Fts5Config *pConfig, 7580 const char *zNearsetCmd, 7581 Fts5ExprNode *pExpr 7582 ){ 7583 char *zRet = 0; 7584 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ 7585 Fts5ExprNearset *pNear = pExpr->pNear; 7586 int i; 7587 int iTerm; 7588 7589 zRet = fts5PrintfAppend(zRet, "%s ", zNearsetCmd); 7590 if( zRet==0 ) return 0; 7591 if( pNear->pColset ){ 7592 int *aiCol = pNear->pColset->aiCol; 7593 int nCol = pNear->pColset->nCol; 7594 if( nCol==1 ){ 7595 zRet = fts5PrintfAppend(zRet, "-col %d ", aiCol[0]); 7596 }else{ 7597 zRet = fts5PrintfAppend(zRet, "-col {%d", aiCol[0]); 7598 for(i=1; i<pNear->pColset->nCol; i++){ 7599 zRet = fts5PrintfAppend(zRet, " %d", aiCol[i]); 7600 } 7601 zRet = fts5PrintfAppend(zRet, "} "); 7602 } 7603 if( zRet==0 ) return 0; 7604 } 7605 7606 if( pNear->nPhrase>1 ){ 7607 zRet = fts5PrintfAppend(zRet, "-near %d ", pNear->nNear); 7608 if( zRet==0 ) return 0; 7609 } 7610 7611 zRet = fts5PrintfAppend(zRet, "--"); 7612 if( zRet==0 ) return 0; 7613 7614 for(i=0; i<pNear->nPhrase; i++){ 7615 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 7616 7617 zRet = fts5PrintfAppend(zRet, " {"); 7618 for(iTerm=0; zRet && iTerm<pPhrase->nTerm; iTerm++){ 7619 char *zTerm = pPhrase->aTerm[iTerm].zTerm; 7620 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); 7621 if( pPhrase->aTerm[iTerm].bPrefix ){ 7622 zRet = fts5PrintfAppend(zRet, "*"); 7623 } 7624 } 7625 7626 if( zRet ) zRet = fts5PrintfAppend(zRet, "}"); 7627 if( zRet==0 ) return 0; 7628 } 7629 7630 }else{ 7631 char const *zOp = 0; 7632 int i; 7633 switch( pExpr->eType ){ 7634 case FTS5_AND: zOp = "AND"; break; 7635 case FTS5_NOT: zOp = "NOT"; break; 7636 default: 7637 assert( pExpr->eType==FTS5_OR ); 7638 zOp = "OR"; 7639 break; 7640 } 7641 7642 zRet = sqlite3_mprintf("%s", zOp); 7643 for(i=0; zRet && i<pExpr->nChild; i++){ 7644 char *z = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->apChild[i]); 7645 if( !z ){ 7646 sqlite3_free(zRet); 7647 zRet = 0; 7648 }else{ 7649 zRet = fts5PrintfAppend(zRet, " [%z]", z); 7650 } 7651 } 7652 } 7653 7654 return zRet; 7655 } 7656 7657 static char *fts5ExprPrint(Fts5Config *pConfig, Fts5ExprNode *pExpr){ 7658 char *zRet = 0; 7659 if( pExpr->eType==0 ){ 7660 return sqlite3_mprintf("\"\""); 7661 }else 7662 if( pExpr->eType==FTS5_STRING || pExpr->eType==FTS5_TERM ){ 7663 Fts5ExprNearset *pNear = pExpr->pNear; 7664 int i; 7665 int iTerm; 7666 7667 if( pNear->pColset ){ 7668 int ii; 7669 Fts5Colset *pColset = pNear->pColset; 7670 if( pColset->nCol>1 ) zRet = fts5PrintfAppend(zRet, "{"); 7671 for(ii=0; ii<pColset->nCol; ii++){ 7672 zRet = fts5PrintfAppend(zRet, "%s%s", 7673 pConfig->azCol[pColset->aiCol[ii]], ii==pColset->nCol-1 ? "" : " " 7674 ); 7675 } 7676 if( zRet ){ 7677 zRet = fts5PrintfAppend(zRet, "%s : ", pColset->nCol>1 ? "}" : ""); 7678 } 7679 if( zRet==0 ) return 0; 7680 } 7681 7682 if( pNear->nPhrase>1 ){ 7683 zRet = fts5PrintfAppend(zRet, "NEAR("); 7684 if( zRet==0 ) return 0; 7685 } 7686 7687 for(i=0; i<pNear->nPhrase; i++){ 7688 Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; 7689 if( i!=0 ){ 7690 zRet = fts5PrintfAppend(zRet, " "); 7691 if( zRet==0 ) return 0; 7692 } 7693 for(iTerm=0; iTerm<pPhrase->nTerm; iTerm++){ 7694 char *zTerm = fts5ExprTermPrint(&pPhrase->aTerm[iTerm]); 7695 if( zTerm ){ 7696 zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" + ", zTerm); 7697 sqlite3_free(zTerm); 7698 } 7699 if( zTerm==0 || zRet==0 ){ 7700 sqlite3_free(zRet); 7701 return 0; 7702 } 7703 } 7704 } 7705 7706 if( pNear->nPhrase>1 ){ 7707 zRet = fts5PrintfAppend(zRet, ", %d)", pNear->nNear); 7708 if( zRet==0 ) return 0; 7709 } 7710 7711 }else{ 7712 char const *zOp = 0; 7713 int i; 7714 7715 switch( pExpr->eType ){ 7716 case FTS5_AND: zOp = " AND "; break; 7717 case FTS5_NOT: zOp = " NOT "; break; 7718 default: 7719 assert( pExpr->eType==FTS5_OR ); 7720 zOp = " OR "; 7721 break; 7722 } 7723 7724 for(i=0; i<pExpr->nChild; i++){ 7725 char *z = fts5ExprPrint(pConfig, pExpr->apChild[i]); 7726 if( z==0 ){ 7727 sqlite3_free(zRet); 7728 zRet = 0; 7729 }else{ 7730 int e = pExpr->apChild[i]->eType; 7731 int b = (e!=FTS5_STRING && e!=FTS5_TERM && e!=FTS5_EOF); 7732 zRet = fts5PrintfAppend(zRet, "%s%s%z%s", 7733 (i==0 ? "" : zOp), 7734 (b?"(":""), z, (b?")":"") 7735 ); 7736 } 7737 if( zRet==0 ) break; 7738 } 7739 } 7740 7741 return zRet; 7742 } 7743 7744 /* 7745 ** The implementation of user-defined scalar functions fts5_expr() (bTcl==0) 7746 ** and fts5_expr_tcl() (bTcl!=0). 7747 */ 7748 static void fts5ExprFunction( 7749 sqlite3_context *pCtx, /* Function call context */ 7750 int nArg, /* Number of args */ 7751 sqlite3_value **apVal, /* Function arguments */ 7752 int bTcl 7753 ){ 7754 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); 7755 sqlite3 *db = sqlite3_context_db_handle(pCtx); 7756 const char *zExpr = 0; 7757 char *zErr = 0; 7758 Fts5Expr *pExpr = 0; 7759 int rc; 7760 int i; 7761 7762 const char **azConfig; /* Array of arguments for Fts5Config */ 7763 const char *zNearsetCmd = "nearset"; 7764 int nConfig; /* Size of azConfig[] */ 7765 Fts5Config *pConfig = 0; 7766 int iArg = 1; 7767 7768 if( nArg<1 ){ 7769 zErr = sqlite3_mprintf("wrong number of arguments to function %s", 7770 bTcl ? "fts5_expr_tcl" : "fts5_expr" 7771 ); 7772 sqlite3_result_error(pCtx, zErr, -1); 7773 sqlite3_free(zErr); 7774 return; 7775 } 7776 7777 if( bTcl && nArg>1 ){ 7778 zNearsetCmd = (const char*)sqlite3_value_text(apVal[1]); 7779 iArg = 2; 7780 } 7781 7782 nConfig = 3 + (nArg-iArg); 7783 azConfig = (const char**)sqlite3_malloc64(sizeof(char*) * nConfig); 7784 if( azConfig==0 ){ 7785 sqlite3_result_error_nomem(pCtx); 7786 return; 7787 } 7788 azConfig[0] = 0; 7789 azConfig[1] = "main"; 7790 azConfig[2] = "tbl"; 7791 for(i=3; iArg<nArg; iArg++){ 7792 const char *z = (const char*)sqlite3_value_text(apVal[iArg]); 7793 azConfig[i++] = (z ? z : ""); 7794 } 7795 7796 zExpr = (const char*)sqlite3_value_text(apVal[0]); 7797 if( zExpr==0 ) zExpr = ""; 7798 7799 rc = sqlite3Fts5ConfigParse(pGlobal, db, nConfig, azConfig, &pConfig, &zErr); 7800 if( rc==SQLITE_OK ){ 7801 rc = sqlite3Fts5ExprNew(pConfig, 0, pConfig->nCol, zExpr, &pExpr, &zErr); 7802 } 7803 if( rc==SQLITE_OK ){ 7804 char *zText; 7805 if( pExpr->pRoot->xNext==0 ){ 7806 zText = sqlite3_mprintf(""); 7807 }else if( bTcl ){ 7808 zText = fts5ExprPrintTcl(pConfig, zNearsetCmd, pExpr->pRoot); 7809 }else{ 7810 zText = fts5ExprPrint(pConfig, pExpr->pRoot); 7811 } 7812 if( zText==0 ){ 7813 rc = SQLITE_NOMEM; 7814 }else{ 7815 sqlite3_result_text(pCtx, zText, -1, SQLITE_TRANSIENT); 7816 sqlite3_free(zText); 7817 } 7818 } 7819 7820 if( rc!=SQLITE_OK ){ 7821 if( zErr ){ 7822 sqlite3_result_error(pCtx, zErr, -1); 7823 sqlite3_free(zErr); 7824 }else{ 7825 sqlite3_result_error_code(pCtx, rc); 7826 } 7827 } 7828 sqlite3_free((void *)azConfig); 7829 sqlite3Fts5ConfigFree(pConfig); 7830 sqlite3Fts5ExprFree(pExpr); 7831 } 7832 7833 static void fts5ExprFunctionHr( 7834 sqlite3_context *pCtx, /* Function call context */ 7835 int nArg, /* Number of args */ 7836 sqlite3_value **apVal /* Function arguments */ 7837 ){ 7838 fts5ExprFunction(pCtx, nArg, apVal, 0); 7839 } 7840 static void fts5ExprFunctionTcl( 7841 sqlite3_context *pCtx, /* Function call context */ 7842 int nArg, /* Number of args */ 7843 sqlite3_value **apVal /* Function arguments */ 7844 ){ 7845 fts5ExprFunction(pCtx, nArg, apVal, 1); 7846 } 7847 7848 /* 7849 ** The implementation of an SQLite user-defined-function that accepts a 7850 ** single integer as an argument. If the integer is an alpha-numeric 7851 ** unicode code point, 1 is returned. Otherwise 0. 7852 */ 7853 static void fts5ExprIsAlnum( 7854 sqlite3_context *pCtx, /* Function call context */ 7855 int nArg, /* Number of args */ 7856 sqlite3_value **apVal /* Function arguments */ 7857 ){ 7858 int iCode; 7859 u8 aArr[32]; 7860 if( nArg!=1 ){ 7861 sqlite3_result_error(pCtx, 7862 "wrong number of arguments to function fts5_isalnum", -1 7863 ); 7864 return; 7865 } 7866 memset(aArr, 0, sizeof(aArr)); 7867 sqlite3Fts5UnicodeCatParse("L*", aArr); 7868 sqlite3Fts5UnicodeCatParse("N*", aArr); 7869 sqlite3Fts5UnicodeCatParse("Co", aArr); 7870 iCode = sqlite3_value_int(apVal[0]); 7871 sqlite3_result_int(pCtx, aArr[sqlite3Fts5UnicodeCategory((u32)iCode)]); 7872 } 7873 7874 static void fts5ExprFold( 7875 sqlite3_context *pCtx, /* Function call context */ 7876 int nArg, /* Number of args */ 7877 sqlite3_value **apVal /* Function arguments */ 7878 ){ 7879 if( nArg!=1 && nArg!=2 ){ 7880 sqlite3_result_error(pCtx, 7881 "wrong number of arguments to function fts5_fold", -1 7882 ); 7883 }else{ 7884 int iCode; 7885 int bRemoveDiacritics = 0; 7886 iCode = sqlite3_value_int(apVal[0]); 7887 if( nArg==2 ) bRemoveDiacritics = sqlite3_value_int(apVal[1]); 7888 sqlite3_result_int(pCtx, sqlite3Fts5UnicodeFold(iCode, bRemoveDiacritics)); 7889 } 7890 } 7891 #endif /* ifdef SQLITE_TEST */ 7892 7893 /* 7894 ** This is called during initialization to register the fts5_expr() scalar 7895 ** UDF with the SQLite handle passed as the only argument. 7896 */ 7897 static int sqlite3Fts5ExprInit(Fts5Global *pGlobal, sqlite3 *db){ 7898 #ifdef SQLITE_TEST 7899 struct Fts5ExprFunc { 7900 const char *z; 7901 void (*x)(sqlite3_context*,int,sqlite3_value**); 7902 } aFunc[] = { 7903 { "fts5_expr", fts5ExprFunctionHr }, 7904 { "fts5_expr_tcl", fts5ExprFunctionTcl }, 7905 { "fts5_isalnum", fts5ExprIsAlnum }, 7906 { "fts5_fold", fts5ExprFold }, 7907 }; 7908 int i; 7909 int rc = SQLITE_OK; 7910 void *pCtx = (void*)pGlobal; 7911 7912 for(i=0; rc==SQLITE_OK && i<ArraySize(aFunc); i++){ 7913 struct Fts5ExprFunc *p = &aFunc[i]; 7914 rc = sqlite3_create_function(db, p->z, -1, SQLITE_UTF8, pCtx, p->x, 0, 0); 7915 } 7916 #else 7917 int rc = SQLITE_OK; 7918 UNUSED_PARAM2(pGlobal,db); 7919 #endif 7920 7921 /* Avoid warnings indicating that sqlite3Fts5ParserTrace() and 7922 ** sqlite3Fts5ParserFallback() are unused */ 7923 #ifndef NDEBUG 7924 (void)sqlite3Fts5ParserTrace; 7925 #endif 7926 (void)sqlite3Fts5ParserFallback; 7927 7928 return rc; 7929 } 7930 7931 /* 7932 ** Return the number of phrases in expression pExpr. 7933 */ 7934 static int sqlite3Fts5ExprPhraseCount(Fts5Expr *pExpr){ 7935 return (pExpr ? pExpr->nPhrase : 0); 7936 } 7937 7938 /* 7939 ** Return the number of terms in the iPhrase'th phrase in pExpr. 7940 */ 7941 static int sqlite3Fts5ExprPhraseSize(Fts5Expr *pExpr, int iPhrase){ 7942 if( iPhrase<0 || iPhrase>=pExpr->nPhrase ) return 0; 7943 return pExpr->apExprPhrase[iPhrase]->nTerm; 7944 } 7945 7946 /* 7947 ** This function is used to access the current position list for phrase 7948 ** iPhrase. 7949 */ 7950 static int sqlite3Fts5ExprPoslist(Fts5Expr *pExpr, int iPhrase, const u8 **pa){ 7951 int nRet; 7952 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; 7953 Fts5ExprNode *pNode = pPhrase->pNode; 7954 if( pNode->bEof==0 && pNode->iRowid==pExpr->pRoot->iRowid ){ 7955 *pa = pPhrase->poslist.p; 7956 nRet = pPhrase->poslist.n; 7957 }else{ 7958 *pa = 0; 7959 nRet = 0; 7960 } 7961 return nRet; 7962 } 7963 7964 struct Fts5PoslistPopulator { 7965 Fts5PoslistWriter writer; 7966 int bOk; /* True if ok to populate */ 7967 int bMiss; 7968 }; 7969 7970 /* 7971 ** Clear the position lists associated with all phrases in the expression 7972 ** passed as the first argument. Argument bLive is true if the expression 7973 ** might be pointing to a real entry, otherwise it has just been reset. 7974 ** 7975 ** At present this function is only used for detail=col and detail=none 7976 ** fts5 tables. This implies that all phrases must be at most 1 token 7977 ** in size, as phrase matches are not supported without detail=full. 7978 */ 7979 static Fts5PoslistPopulator *sqlite3Fts5ExprClearPoslists(Fts5Expr *pExpr, int bLive){ 7980 Fts5PoslistPopulator *pRet; 7981 pRet = sqlite3_malloc64(sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); 7982 if( pRet ){ 7983 int i; 7984 memset(pRet, 0, sizeof(Fts5PoslistPopulator)*pExpr->nPhrase); 7985 for(i=0; i<pExpr->nPhrase; i++){ 7986 Fts5Buffer *pBuf = &pExpr->apExprPhrase[i]->poslist; 7987 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; 7988 assert( pExpr->apExprPhrase[i]->nTerm<=1 ); 7989 if( bLive && 7990 (pBuf->n==0 || pNode->iRowid!=pExpr->pRoot->iRowid || pNode->bEof) 7991 ){ 7992 pRet[i].bMiss = 1; 7993 }else{ 7994 pBuf->n = 0; 7995 } 7996 } 7997 } 7998 return pRet; 7999 } 8000 8001 struct Fts5ExprCtx { 8002 Fts5Expr *pExpr; 8003 Fts5PoslistPopulator *aPopulator; 8004 i64 iOff; 8005 }; 8006 typedef struct Fts5ExprCtx Fts5ExprCtx; 8007 8008 /* 8009 ** TODO: Make this more efficient! 8010 */ 8011 static int fts5ExprColsetTest(Fts5Colset *pColset, int iCol){ 8012 int i; 8013 for(i=0; i<pColset->nCol; i++){ 8014 if( pColset->aiCol[i]==iCol ) return 1; 8015 } 8016 return 0; 8017 } 8018 8019 static int fts5ExprPopulatePoslistsCb( 8020 void *pCtx, /* Copy of 2nd argument to xTokenize() */ 8021 int tflags, /* Mask of FTS5_TOKEN_* flags */ 8022 const char *pToken, /* Pointer to buffer containing token */ 8023 int nToken, /* Size of token in bytes */ 8024 int iUnused1, /* Byte offset of token within input text */ 8025 int iUnused2 /* Byte offset of end of token within input text */ 8026 ){ 8027 Fts5ExprCtx *p = (Fts5ExprCtx*)pCtx; 8028 Fts5Expr *pExpr = p->pExpr; 8029 int i; 8030 8031 UNUSED_PARAM2(iUnused1, iUnused2); 8032 8033 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; 8034 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ) p->iOff++; 8035 for(i=0; i<pExpr->nPhrase; i++){ 8036 Fts5ExprTerm *pTerm; 8037 if( p->aPopulator[i].bOk==0 ) continue; 8038 for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ 8039 int nTerm = (int)strlen(pTerm->zTerm); 8040 if( (nTerm==nToken || (nTerm<nToken && pTerm->bPrefix)) 8041 && memcmp(pTerm->zTerm, pToken, nTerm)==0 8042 ){ 8043 int rc = sqlite3Fts5PoslistWriterAppend( 8044 &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff 8045 ); 8046 if( rc ) return rc; 8047 break; 8048 } 8049 } 8050 } 8051 return SQLITE_OK; 8052 } 8053 8054 static int sqlite3Fts5ExprPopulatePoslists( 8055 Fts5Config *pConfig, 8056 Fts5Expr *pExpr, 8057 Fts5PoslistPopulator *aPopulator, 8058 int iCol, 8059 const char *z, int n 8060 ){ 8061 int i; 8062 Fts5ExprCtx sCtx; 8063 sCtx.pExpr = pExpr; 8064 sCtx.aPopulator = aPopulator; 8065 sCtx.iOff = (((i64)iCol) << 32) - 1; 8066 8067 for(i=0; i<pExpr->nPhrase; i++){ 8068 Fts5ExprNode *pNode = pExpr->apExprPhrase[i]->pNode; 8069 Fts5Colset *pColset = pNode->pNear->pColset; 8070 if( (pColset && 0==fts5ExprColsetTest(pColset, iCol)) 8071 || aPopulator[i].bMiss 8072 ){ 8073 aPopulator[i].bOk = 0; 8074 }else{ 8075 aPopulator[i].bOk = 1; 8076 } 8077 } 8078 8079 return sqlite3Fts5Tokenize(pConfig, 8080 FTS5_TOKENIZE_DOCUMENT, z, n, (void*)&sCtx, fts5ExprPopulatePoslistsCb 8081 ); 8082 } 8083 8084 static void fts5ExprClearPoslists(Fts5ExprNode *pNode){ 8085 if( pNode->eType==FTS5_TERM || pNode->eType==FTS5_STRING ){ 8086 pNode->pNear->apPhrase[0]->poslist.n = 0; 8087 }else{ 8088 int i; 8089 for(i=0; i<pNode->nChild; i++){ 8090 fts5ExprClearPoslists(pNode->apChild[i]); 8091 } 8092 } 8093 } 8094 8095 static int fts5ExprCheckPoslists(Fts5ExprNode *pNode, i64 iRowid){ 8096 pNode->iRowid = iRowid; 8097 pNode->bEof = 0; 8098 switch( pNode->eType ){ 8099 case FTS5_TERM: 8100 case FTS5_STRING: 8101 return (pNode->pNear->apPhrase[0]->poslist.n>0); 8102 8103 case FTS5_AND: { 8104 int i; 8105 for(i=0; i<pNode->nChild; i++){ 8106 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid)==0 ){ 8107 fts5ExprClearPoslists(pNode); 8108 return 0; 8109 } 8110 } 8111 break; 8112 } 8113 8114 case FTS5_OR: { 8115 int i; 8116 int bRet = 0; 8117 for(i=0; i<pNode->nChild; i++){ 8118 if( fts5ExprCheckPoslists(pNode->apChild[i], iRowid) ){ 8119 bRet = 1; 8120 } 8121 } 8122 return bRet; 8123 } 8124 8125 default: { 8126 assert( pNode->eType==FTS5_NOT ); 8127 if( 0==fts5ExprCheckPoslists(pNode->apChild[0], iRowid) 8128 || 0!=fts5ExprCheckPoslists(pNode->apChild[1], iRowid) 8129 ){ 8130 fts5ExprClearPoslists(pNode); 8131 return 0; 8132 } 8133 break; 8134 } 8135 } 8136 return 1; 8137 } 8138 8139 static void sqlite3Fts5ExprCheckPoslists(Fts5Expr *pExpr, i64 iRowid){ 8140 fts5ExprCheckPoslists(pExpr->pRoot, iRowid); 8141 } 8142 8143 /* 8144 ** This function is only called for detail=columns tables. 8145 */ 8146 static int sqlite3Fts5ExprPhraseCollist( 8147 Fts5Expr *pExpr, 8148 int iPhrase, 8149 const u8 **ppCollist, 8150 int *pnCollist 8151 ){ 8152 Fts5ExprPhrase *pPhrase = pExpr->apExprPhrase[iPhrase]; 8153 Fts5ExprNode *pNode = pPhrase->pNode; 8154 int rc = SQLITE_OK; 8155 8156 assert( iPhrase>=0 && iPhrase<pExpr->nPhrase ); 8157 assert( pExpr->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); 8158 8159 if( pNode->bEof==0 8160 && pNode->iRowid==pExpr->pRoot->iRowid 8161 && pPhrase->poslist.n>0 8162 ){ 8163 Fts5ExprTerm *pTerm = &pPhrase->aTerm[0]; 8164 if( pTerm->pSynonym ){ 8165 Fts5Buffer *pBuf = (Fts5Buffer*)&pTerm->pSynonym[1]; 8166 rc = fts5ExprSynonymList( 8167 pTerm, pNode->iRowid, pBuf, (u8**)ppCollist, pnCollist 8168 ); 8169 }else{ 8170 *ppCollist = pPhrase->aTerm[0].pIter->pData; 8171 *pnCollist = pPhrase->aTerm[0].pIter->nData; 8172 } 8173 }else{ 8174 *ppCollist = 0; 8175 *pnCollist = 0; 8176 } 8177 8178 return rc; 8179 } 8180 8181 #line 1 "fts5_hash.c" 8182 /* 8183 ** 2014 August 11 8184 ** 8185 ** The author disclaims copyright to this source code. In place of 8186 ** a legal notice, here is a blessing: 8187 ** 8188 ** May you do good and not evil. 8189 ** May you find forgiveness for yourself and forgive others. 8190 ** May you share freely, never taking more than you give. 8191 ** 8192 ****************************************************************************** 8193 ** 8194 */ 8195 8196 8197 8198 /* #include "fts5Int.h" */ 8199 8200 typedef struct Fts5HashEntry Fts5HashEntry; 8201 8202 /* 8203 ** This file contains the implementation of an in-memory hash table used 8204 ** to accumuluate "term -> doclist" content before it is flused to a level-0 8205 ** segment. 8206 */ 8207 8208 8209 struct Fts5Hash { 8210 int eDetail; /* Copy of Fts5Config.eDetail */ 8211 int *pnByte; /* Pointer to bytes counter */ 8212 int nEntry; /* Number of entries currently in hash */ 8213 int nSlot; /* Size of aSlot[] array */ 8214 Fts5HashEntry *pScan; /* Current ordered scan item */ 8215 Fts5HashEntry **aSlot; /* Array of hash slots */ 8216 }; 8217 8218 /* 8219 ** Each entry in the hash table is represented by an object of the 8220 ** following type. Each object, its key (a nul-terminated string) and 8221 ** its current data are stored in a single memory allocation. The 8222 ** key immediately follows the object in memory. The position list 8223 ** data immediately follows the key data in memory. 8224 ** 8225 ** The data that follows the key is in a similar, but not identical format 8226 ** to the doclist data stored in the database. It is: 8227 ** 8228 ** * Rowid, as a varint 8229 ** * Position list, without 0x00 terminator. 8230 ** * Size of previous position list and rowid, as a 4 byte 8231 ** big-endian integer. 8232 ** 8233 ** iRowidOff: 8234 ** Offset of last rowid written to data area. Relative to first byte of 8235 ** structure. 8236 ** 8237 ** nData: 8238 ** Bytes of data written since iRowidOff. 8239 */ 8240 struct Fts5HashEntry { 8241 Fts5HashEntry *pHashNext; /* Next hash entry with same hash-key */ 8242 Fts5HashEntry *pScanNext; /* Next entry in sorted order */ 8243 8244 int nAlloc; /* Total size of allocation */ 8245 int iSzPoslist; /* Offset of space for 4-byte poslist size */ 8246 int nData; /* Total bytes of data (incl. structure) */ 8247 int nKey; /* Length of key in bytes */ 8248 u8 bDel; /* Set delete-flag @ iSzPoslist */ 8249 u8 bContent; /* Set content-flag (detail=none mode) */ 8250 i16 iCol; /* Column of last value written */ 8251 int iPos; /* Position of last value written */ 8252 i64 iRowid; /* Rowid of last value written */ 8253 }; 8254 8255 /* 8256 ** Eqivalent to: 8257 ** 8258 ** char *fts5EntryKey(Fts5HashEntry *pEntry){ return zKey; } 8259 */ 8260 #define fts5EntryKey(p) ( ((char *)(&(p)[1])) ) 8261 8262 8263 /* 8264 ** Allocate a new hash table. 8265 */ 8266 static int sqlite3Fts5HashNew(Fts5Config *pConfig, Fts5Hash **ppNew, int *pnByte){ 8267 int rc = SQLITE_OK; 8268 Fts5Hash *pNew; 8269 8270 *ppNew = pNew = (Fts5Hash*)sqlite3_malloc(sizeof(Fts5Hash)); 8271 if( pNew==0 ){ 8272 rc = SQLITE_NOMEM; 8273 }else{ 8274 sqlite3_int64 nByte; 8275 memset(pNew, 0, sizeof(Fts5Hash)); 8276 pNew->pnByte = pnByte; 8277 pNew->eDetail = pConfig->eDetail; 8278 8279 pNew->nSlot = 1024; 8280 nByte = sizeof(Fts5HashEntry*) * pNew->nSlot; 8281 pNew->aSlot = (Fts5HashEntry**)sqlite3_malloc64(nByte); 8282 if( pNew->aSlot==0 ){ 8283 sqlite3_free(pNew); 8284 *ppNew = 0; 8285 rc = SQLITE_NOMEM; 8286 }else{ 8287 memset(pNew->aSlot, 0, (size_t)nByte); 8288 } 8289 } 8290 return rc; 8291 } 8292 8293 /* 8294 ** Free a hash table object. 8295 */ 8296 static void sqlite3Fts5HashFree(Fts5Hash *pHash){ 8297 if( pHash ){ 8298 sqlite3Fts5HashClear(pHash); 8299 sqlite3_free(pHash->aSlot); 8300 sqlite3_free(pHash); 8301 } 8302 } 8303 8304 /* 8305 ** Empty (but do not delete) a hash table. 8306 */ 8307 static void sqlite3Fts5HashClear(Fts5Hash *pHash){ 8308 int i; 8309 for(i=0; i<pHash->nSlot; i++){ 8310 Fts5HashEntry *pNext; 8311 Fts5HashEntry *pSlot; 8312 for(pSlot=pHash->aSlot[i]; pSlot; pSlot=pNext){ 8313 pNext = pSlot->pHashNext; 8314 sqlite3_free(pSlot); 8315 } 8316 } 8317 memset(pHash->aSlot, 0, pHash->nSlot * sizeof(Fts5HashEntry*)); 8318 pHash->nEntry = 0; 8319 } 8320 8321 static unsigned int fts5HashKey(int nSlot, const u8 *p, int n){ 8322 int i; 8323 unsigned int h = 13; 8324 for(i=n-1; i>=0; i--){ 8325 h = (h << 3) ^ h ^ p[i]; 8326 } 8327 return (h % nSlot); 8328 } 8329 8330 static unsigned int fts5HashKey2(int nSlot, u8 b, const u8 *p, int n){ 8331 int i; 8332 unsigned int h = 13; 8333 for(i=n-1; i>=0; i--){ 8334 h = (h << 3) ^ h ^ p[i]; 8335 } 8336 h = (h << 3) ^ h ^ b; 8337 return (h % nSlot); 8338 } 8339 8340 /* 8341 ** Resize the hash table by doubling the number of slots. 8342 */ 8343 static int fts5HashResize(Fts5Hash *pHash){ 8344 int nNew = pHash->nSlot*2; 8345 int i; 8346 Fts5HashEntry **apNew; 8347 Fts5HashEntry **apOld = pHash->aSlot; 8348 8349 apNew = (Fts5HashEntry**)sqlite3_malloc64(nNew*sizeof(Fts5HashEntry*)); 8350 if( !apNew ) return SQLITE_NOMEM; 8351 memset(apNew, 0, nNew*sizeof(Fts5HashEntry*)); 8352 8353 for(i=0; i<pHash->nSlot; i++){ 8354 while( apOld[i] ){ 8355 unsigned int iHash; 8356 Fts5HashEntry *p = apOld[i]; 8357 apOld[i] = p->pHashNext; 8358 iHash = fts5HashKey(nNew, (u8*)fts5EntryKey(p), 8359 (int)strlen(fts5EntryKey(p))); 8360 p->pHashNext = apNew[iHash]; 8361 apNew[iHash] = p; 8362 } 8363 } 8364 8365 sqlite3_free(apOld); 8366 pHash->nSlot = nNew; 8367 pHash->aSlot = apNew; 8368 return SQLITE_OK; 8369 } 8370 8371 static int fts5HashAddPoslistSize( 8372 Fts5Hash *pHash, 8373 Fts5HashEntry *p, 8374 Fts5HashEntry *p2 8375 ){ 8376 int nRet = 0; 8377 if( p->iSzPoslist ){ 8378 u8 *pPtr = p2 ? (u8*)p2 : (u8*)p; 8379 int nData = p->nData; 8380 if( pHash->eDetail==FTS5_DETAIL_NONE ){ 8381 assert( nData==p->iSzPoslist ); 8382 if( p->bDel ){ 8383 pPtr[nData++] = 0x00; 8384 if( p->bContent ){ 8385 pPtr[nData++] = 0x00; 8386 } 8387 } 8388 }else{ 8389 int nSz = (nData - p->iSzPoslist - 1); /* Size in bytes */ 8390 int nPos = nSz*2 + p->bDel; /* Value of nPos field */ 8391 8392 assert( p->bDel==0 || p->bDel==1 ); 8393 if( nPos<=127 ){ 8394 pPtr[p->iSzPoslist] = (u8)nPos; 8395 }else{ 8396 int nByte = sqlite3Fts5GetVarintLen((u32)nPos); 8397 memmove(&pPtr[p->iSzPoslist + nByte], &pPtr[p->iSzPoslist + 1], nSz); 8398 sqlite3Fts5PutVarint(&pPtr[p->iSzPoslist], nPos); 8399 nData += (nByte-1); 8400 } 8401 } 8402 8403 nRet = nData - p->nData; 8404 if( p2==0 ){ 8405 p->iSzPoslist = 0; 8406 p->bDel = 0; 8407 p->bContent = 0; 8408 p->nData = nData; 8409 } 8410 } 8411 return nRet; 8412 } 8413 8414 /* 8415 ** Add an entry to the in-memory hash table. The key is the concatenation 8416 ** of bByte and (pToken/nToken). The value is (iRowid/iCol/iPos). 8417 ** 8418 ** (bByte || pToken) -> (iRowid,iCol,iPos) 8419 ** 8420 ** Or, if iCol is negative, then the value is a delete marker. 8421 */ 8422 static int sqlite3Fts5HashWrite( 8423 Fts5Hash *pHash, 8424 i64 iRowid, /* Rowid for this entry */ 8425 int iCol, /* Column token appears in (-ve -> delete) */ 8426 int iPos, /* Position of token within column */ 8427 char bByte, /* First byte of token */ 8428 const char *pToken, int nToken /* Token to add or remove to or from index */ 8429 ){ 8430 unsigned int iHash; 8431 Fts5HashEntry *p; 8432 u8 *pPtr; 8433 int nIncr = 0; /* Amount to increment (*pHash->pnByte) by */ 8434 int bNew; /* If non-delete entry should be written */ 8435 8436 bNew = (pHash->eDetail==FTS5_DETAIL_FULL); 8437 8438 /* Attempt to locate an existing hash entry */ 8439 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); 8440 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ 8441 char *zKey = fts5EntryKey(p); 8442 if( zKey[0]==bByte 8443 && p->nKey==nToken 8444 && memcmp(&zKey[1], pToken, nToken)==0 8445 ){ 8446 break; 8447 } 8448 } 8449 8450 /* If an existing hash entry cannot be found, create a new one. */ 8451 if( p==0 ){ 8452 /* Figure out how much space to allocate */ 8453 char *zKey; 8454 sqlite3_int64 nByte = sizeof(Fts5HashEntry) + (nToken+1) + 1 + 64; 8455 if( nByte<128 ) nByte = 128; 8456 8457 /* Grow the Fts5Hash.aSlot[] array if necessary. */ 8458 if( (pHash->nEntry*2)>=pHash->nSlot ){ 8459 int rc = fts5HashResize(pHash); 8460 if( rc!=SQLITE_OK ) return rc; 8461 iHash = fts5HashKey2(pHash->nSlot, (u8)bByte, (const u8*)pToken, nToken); 8462 } 8463 8464 /* Allocate new Fts5HashEntry and add it to the hash table. */ 8465 p = (Fts5HashEntry*)sqlite3_malloc64(nByte); 8466 if( !p ) return SQLITE_NOMEM; 8467 memset(p, 0, sizeof(Fts5HashEntry)); 8468 p->nAlloc = (int)nByte; 8469 zKey = fts5EntryKey(p); 8470 zKey[0] = bByte; 8471 memcpy(&zKey[1], pToken, nToken); 8472 assert( iHash==fts5HashKey(pHash->nSlot, (u8*)zKey, nToken+1) ); 8473 p->nKey = nToken; 8474 zKey[nToken+1] = '\0'; 8475 p->nData = nToken+1 + 1 + sizeof(Fts5HashEntry); 8476 p->pHashNext = pHash->aSlot[iHash]; 8477 pHash->aSlot[iHash] = p; 8478 pHash->nEntry++; 8479 8480 /* Add the first rowid field to the hash-entry */ 8481 p->nData += sqlite3Fts5PutVarint(&((u8*)p)[p->nData], iRowid); 8482 p->iRowid = iRowid; 8483 8484 p->iSzPoslist = p->nData; 8485 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ 8486 p->nData += 1; 8487 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); 8488 } 8489 8490 }else{ 8491 8492 /* Appending to an existing hash-entry. Check that there is enough 8493 ** space to append the largest possible new entry. Worst case scenario 8494 ** is: 8495 ** 8496 ** + 9 bytes for a new rowid, 8497 ** + 4 byte reserved for the "poslist size" varint. 8498 ** + 1 byte for a "new column" byte, 8499 ** + 3 bytes for a new column number (16-bit max) as a varint, 8500 ** + 5 bytes for the new position offset (32-bit max). 8501 */ 8502 if( (p->nAlloc - p->nData) < (9 + 4 + 1 + 3 + 5) ){ 8503 sqlite3_int64 nNew = p->nAlloc * 2; 8504 Fts5HashEntry *pNew; 8505 Fts5HashEntry **pp; 8506 pNew = (Fts5HashEntry*)sqlite3_realloc64(p, nNew); 8507 if( pNew==0 ) return SQLITE_NOMEM; 8508 pNew->nAlloc = (int)nNew; 8509 for(pp=&pHash->aSlot[iHash]; *pp!=p; pp=&(*pp)->pHashNext); 8510 *pp = pNew; 8511 p = pNew; 8512 } 8513 nIncr -= p->nData; 8514 } 8515 assert( (p->nAlloc - p->nData) >= (9 + 4 + 1 + 3 + 5) ); 8516 8517 pPtr = (u8*)p; 8518 8519 /* If this is a new rowid, append the 4-byte size field for the previous 8520 ** entry, and the new rowid for this entry. */ 8521 if( iRowid!=p->iRowid ){ 8522 u64 iDiff = (u64)iRowid - (u64)p->iRowid; 8523 fts5HashAddPoslistSize(pHash, p, 0); 8524 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iDiff); 8525 p->iRowid = iRowid; 8526 bNew = 1; 8527 p->iSzPoslist = p->nData; 8528 if( pHash->eDetail!=FTS5_DETAIL_NONE ){ 8529 p->nData += 1; 8530 p->iCol = (pHash->eDetail==FTS5_DETAIL_FULL ? 0 : -1); 8531 p->iPos = 0; 8532 } 8533 } 8534 8535 if( iCol>=0 ){ 8536 if( pHash->eDetail==FTS5_DETAIL_NONE ){ 8537 p->bContent = 1; 8538 }else{ 8539 /* Append a new column value, if necessary */ 8540 assert_nc( iCol>=p->iCol ); 8541 if( iCol!=p->iCol ){ 8542 if( pHash->eDetail==FTS5_DETAIL_FULL ){ 8543 pPtr[p->nData++] = 0x01; 8544 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iCol); 8545 p->iCol = (i16)iCol; 8546 p->iPos = 0; 8547 }else{ 8548 bNew = 1; 8549 p->iCol = (i16)(iPos = iCol); 8550 } 8551 } 8552 8553 /* Append the new position offset, if necessary */ 8554 if( bNew ){ 8555 p->nData += sqlite3Fts5PutVarint(&pPtr[p->nData], iPos - p->iPos + 2); 8556 p->iPos = iPos; 8557 } 8558 } 8559 }else{ 8560 /* This is a delete. Set the delete flag. */ 8561 p->bDel = 1; 8562 } 8563 8564 nIncr += p->nData; 8565 *pHash->pnByte += nIncr; 8566 return SQLITE_OK; 8567 } 8568 8569 8570 /* 8571 ** Arguments pLeft and pRight point to linked-lists of hash-entry objects, 8572 ** each sorted in key order. This function merges the two lists into a 8573 ** single list and returns a pointer to its first element. 8574 */ 8575 static Fts5HashEntry *fts5HashEntryMerge( 8576 Fts5HashEntry *pLeft, 8577 Fts5HashEntry *pRight 8578 ){ 8579 Fts5HashEntry *p1 = pLeft; 8580 Fts5HashEntry *p2 = pRight; 8581 Fts5HashEntry *pRet = 0; 8582 Fts5HashEntry **ppOut = &pRet; 8583 8584 while( p1 || p2 ){ 8585 if( p1==0 ){ 8586 *ppOut = p2; 8587 p2 = 0; 8588 }else if( p2==0 ){ 8589 *ppOut = p1; 8590 p1 = 0; 8591 }else{ 8592 int i = 0; 8593 char *zKey1 = fts5EntryKey(p1); 8594 char *zKey2 = fts5EntryKey(p2); 8595 while( zKey1[i]==zKey2[i] ) i++; 8596 8597 if( ((u8)zKey1[i])>((u8)zKey2[i]) ){ 8598 /* p2 is smaller */ 8599 *ppOut = p2; 8600 ppOut = &p2->pScanNext; 8601 p2 = p2->pScanNext; 8602 }else{ 8603 /* p1 is smaller */ 8604 *ppOut = p1; 8605 ppOut = &p1->pScanNext; 8606 p1 = p1->pScanNext; 8607 } 8608 *ppOut = 0; 8609 } 8610 } 8611 8612 return pRet; 8613 } 8614 8615 /* 8616 ** Extract all tokens from hash table iHash and link them into a list 8617 ** in sorted order. The hash table is cleared before returning. It is 8618 ** the responsibility of the caller to free the elements of the returned 8619 ** list. 8620 */ 8621 static int fts5HashEntrySort( 8622 Fts5Hash *pHash, 8623 const char *pTerm, int nTerm, /* Query prefix, if any */ 8624 Fts5HashEntry **ppSorted 8625 ){ 8626 const int nMergeSlot = 32; 8627 Fts5HashEntry **ap; 8628 Fts5HashEntry *pList; 8629 int iSlot; 8630 int i; 8631 8632 *ppSorted = 0; 8633 ap = sqlite3_malloc64(sizeof(Fts5HashEntry*) * nMergeSlot); 8634 if( !ap ) return SQLITE_NOMEM; 8635 memset(ap, 0, sizeof(Fts5HashEntry*) * nMergeSlot); 8636 8637 for(iSlot=0; iSlot<pHash->nSlot; iSlot++){ 8638 Fts5HashEntry *pIter; 8639 for(pIter=pHash->aSlot[iSlot]; pIter; pIter=pIter->pHashNext){ 8640 if( pTerm==0 8641 || (pIter->nKey+1>=nTerm && 0==memcmp(fts5EntryKey(pIter), pTerm, nTerm)) 8642 ){ 8643 Fts5HashEntry *pEntry = pIter; 8644 pEntry->pScanNext = 0; 8645 for(i=0; ap[i]; i++){ 8646 pEntry = fts5HashEntryMerge(pEntry, ap[i]); 8647 ap[i] = 0; 8648 } 8649 ap[i] = pEntry; 8650 } 8651 } 8652 } 8653 8654 pList = 0; 8655 for(i=0; i<nMergeSlot; i++){ 8656 pList = fts5HashEntryMerge(pList, ap[i]); 8657 } 8658 8659 pHash->nEntry = 0; 8660 sqlite3_free(ap); 8661 *ppSorted = pList; 8662 return SQLITE_OK; 8663 } 8664 8665 /* 8666 ** Query the hash table for a doclist associated with term pTerm/nTerm. 8667 */ 8668 static int sqlite3Fts5HashQuery( 8669 Fts5Hash *pHash, /* Hash table to query */ 8670 int nPre, 8671 const char *pTerm, int nTerm, /* Query term */ 8672 void **ppOut, /* OUT: Pointer to new object */ 8673 int *pnDoclist /* OUT: Size of doclist in bytes */ 8674 ){ 8675 unsigned int iHash = fts5HashKey(pHash->nSlot, (const u8*)pTerm, nTerm); 8676 char *zKey = 0; 8677 Fts5HashEntry *p; 8678 8679 for(p=pHash->aSlot[iHash]; p; p=p->pHashNext){ 8680 zKey = fts5EntryKey(p); 8681 assert( p->nKey+1==(int)strlen(zKey) ); 8682 if( nTerm==p->nKey+1 && memcmp(zKey, pTerm, nTerm)==0 ) break; 8683 } 8684 8685 if( p ){ 8686 int nHashPre = sizeof(Fts5HashEntry) + nTerm + 1; 8687 int nList = p->nData - nHashPre; 8688 u8 *pRet = (u8*)(*ppOut = sqlite3_malloc64(nPre + nList + 10)); 8689 if( pRet ){ 8690 Fts5HashEntry *pFaux = (Fts5HashEntry*)&pRet[nPre-nHashPre]; 8691 memcpy(&pRet[nPre], &((u8*)p)[nHashPre], nList); 8692 nList += fts5HashAddPoslistSize(pHash, p, pFaux); 8693 *pnDoclist = nList; 8694 }else{ 8695 *pnDoclist = 0; 8696 return SQLITE_NOMEM; 8697 } 8698 }else{ 8699 *ppOut = 0; 8700 *pnDoclist = 0; 8701 } 8702 8703 return SQLITE_OK; 8704 } 8705 8706 static int sqlite3Fts5HashScanInit( 8707 Fts5Hash *p, /* Hash table to query */ 8708 const char *pTerm, int nTerm /* Query prefix */ 8709 ){ 8710 return fts5HashEntrySort(p, pTerm, nTerm, &p->pScan); 8711 } 8712 8713 static void sqlite3Fts5HashScanNext(Fts5Hash *p){ 8714 assert( !sqlite3Fts5HashScanEof(p) ); 8715 p->pScan = p->pScan->pScanNext; 8716 } 8717 8718 static int sqlite3Fts5HashScanEof(Fts5Hash *p){ 8719 return (p->pScan==0); 8720 } 8721 8722 static void sqlite3Fts5HashScanEntry( 8723 Fts5Hash *pHash, 8724 const char **pzTerm, /* OUT: term (nul-terminated) */ 8725 const u8 **ppDoclist, /* OUT: pointer to doclist */ 8726 int *pnDoclist /* OUT: size of doclist in bytes */ 8727 ){ 8728 Fts5HashEntry *p; 8729 if( (p = pHash->pScan) ){ 8730 char *zKey = fts5EntryKey(p); 8731 int nTerm = (int)strlen(zKey); 8732 fts5HashAddPoslistSize(pHash, p, 0); 8733 *pzTerm = zKey; 8734 *ppDoclist = (const u8*)&zKey[nTerm+1]; 8735 *pnDoclist = p->nData - (sizeof(Fts5HashEntry) + nTerm + 1); 8736 }else{ 8737 *pzTerm = 0; 8738 *ppDoclist = 0; 8739 *pnDoclist = 0; 8740 } 8741 } 8742 8743 #line 1 "fts5_index.c" 8744 /* 8745 ** 2014 May 31 8746 ** 8747 ** The author disclaims copyright to this source code. In place of 8748 ** a legal notice, here is a blessing: 8749 ** 8750 ** May you do good and not evil. 8751 ** May you find forgiveness for yourself and forgive others. 8752 ** May you share freely, never taking more than you give. 8753 ** 8754 ****************************************************************************** 8755 ** 8756 ** Low level access to the FTS index stored in the database file. The 8757 ** routines in this file file implement all read and write access to the 8758 ** %_data table. Other parts of the system access this functionality via 8759 ** the interface defined in fts5Int.h. 8760 */ 8761 8762 8763 /* #include "fts5Int.h" */ 8764 8765 /* 8766 ** Overview: 8767 ** 8768 ** The %_data table contains all the FTS indexes for an FTS5 virtual table. 8769 ** As well as the main term index, there may be up to 31 prefix indexes. 8770 ** The format is similar to FTS3/4, except that: 8771 ** 8772 ** * all segment b-tree leaf data is stored in fixed size page records 8773 ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is 8774 ** taken to ensure it is possible to iterate in either direction through 8775 ** the entries in a doclist, or to seek to a specific entry within a 8776 ** doclist, without loading it into memory. 8777 ** 8778 ** * large doclists that span many pages have associated "doclist index" 8779 ** records that contain a copy of the first rowid on each page spanned by 8780 ** the doclist. This is used to speed up seek operations, and merges of 8781 ** large doclists with very small doclists. 8782 ** 8783 ** * extra fields in the "structure record" record the state of ongoing 8784 ** incremental merge operations. 8785 ** 8786 */ 8787 8788 8789 #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ 8790 #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ 8791 8792 #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ 8793 8794 #define FTS5_MAIN_PREFIX '0' 8795 8796 #if FTS5_MAX_PREFIX_INDEXES > 31 8797 # error "FTS5_MAX_PREFIX_INDEXES is too large" 8798 #endif 8799 8800 /* 8801 ** Details: 8802 ** 8803 ** The %_data table managed by this module, 8804 ** 8805 ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); 8806 ** 8807 ** , contains the following 5 types of records. See the comments surrounding 8808 ** the FTS5_*_ROWID macros below for a description of how %_data rowids are 8809 ** assigned to each fo them. 8810 ** 8811 ** 1. Structure Records: 8812 ** 8813 ** The set of segments that make up an index - the index structure - are 8814 ** recorded in a single record within the %_data table. The record consists 8815 ** of a single 32-bit configuration cookie value followed by a list of 8816 ** SQLite varints. If the FTS table features more than one index (because 8817 ** there are one or more prefix indexes), it is guaranteed that all share 8818 ** the same cookie value. 8819 ** 8820 ** Immediately following the configuration cookie, the record begins with 8821 ** three varints: 8822 ** 8823 ** + number of levels, 8824 ** + total number of segments on all levels, 8825 ** + value of write counter. 8826 ** 8827 ** Then, for each level from 0 to nMax: 8828 ** 8829 ** + number of input segments in ongoing merge. 8830 ** + total number of segments in level. 8831 ** + for each segment from oldest to newest: 8832 ** + segment id (always > 0) 8833 ** + first leaf page number (often 1, always greater than 0) 8834 ** + final leaf page number 8835 ** 8836 ** 2. The Averages Record: 8837 ** 8838 ** A single record within the %_data table. The data is a list of varints. 8839 ** The first value is the number of rows in the index. Then, for each column 8840 ** from left to right, the total number of tokens in the column for all 8841 ** rows of the table. 8842 ** 8843 ** 3. Segment leaves: 8844 ** 8845 ** TERM/DOCLIST FORMAT: 8846 ** 8847 ** Most of each segment leaf is taken up by term/doclist data. The 8848 ** general format of term/doclist, starting with the first term 8849 ** on the leaf page, is: 8850 ** 8851 ** varint : size of first term 8852 ** blob: first term data 8853 ** doclist: first doclist 8854 ** zero-or-more { 8855 ** varint: number of bytes in common with previous term 8856 ** varint: number of bytes of new term data (nNew) 8857 ** blob: nNew bytes of new term data 8858 ** doclist: next doclist 8859 ** } 8860 ** 8861 ** doclist format: 8862 ** 8863 ** varint: first rowid 8864 ** poslist: first poslist 8865 ** zero-or-more { 8866 ** varint: rowid delta (always > 0) 8867 ** poslist: next poslist 8868 ** } 8869 ** 8870 ** poslist format: 8871 ** 8872 ** varint: size of poslist in bytes multiplied by 2, not including 8873 ** this field. Plus 1 if this entry carries the "delete" flag. 8874 ** collist: collist for column 0 8875 ** zero-or-more { 8876 ** 0x01 byte 8877 ** varint: column number (I) 8878 ** collist: collist for column I 8879 ** } 8880 ** 8881 ** collist format: 8882 ** 8883 ** varint: first offset + 2 8884 ** zero-or-more { 8885 ** varint: offset delta + 2 8886 ** } 8887 ** 8888 ** PAGE FORMAT 8889 ** 8890 ** Each leaf page begins with a 4-byte header containing 2 16-bit 8891 ** unsigned integer fields in big-endian format. They are: 8892 ** 8893 ** * The byte offset of the first rowid on the page, if it exists 8894 ** and occurs before the first term (otherwise 0). 8895 ** 8896 ** * The byte offset of the start of the page footer. If the page 8897 ** footer is 0 bytes in size, then this field is the same as the 8898 ** size of the leaf page in bytes. 8899 ** 8900 ** The page footer consists of a single varint for each term located 8901 ** on the page. Each varint is the byte offset of the current term 8902 ** within the page, delta-compressed against the previous value. In 8903 ** other words, the first varint in the footer is the byte offset of 8904 ** the first term, the second is the byte offset of the second less that 8905 ** of the first, and so on. 8906 ** 8907 ** The term/doclist format described above is accurate if the entire 8908 ** term/doclist data fits on a single leaf page. If this is not the case, 8909 ** the format is changed in two ways: 8910 ** 8911 ** + if the first rowid on a page occurs before the first term, it 8912 ** is stored as a literal value: 8913 ** 8914 ** varint: first rowid 8915 ** 8916 ** + the first term on each page is stored in the same way as the 8917 ** very first term of the segment: 8918 ** 8919 ** varint : size of first term 8920 ** blob: first term data 8921 ** 8922 ** 5. Segment doclist indexes: 8923 ** 8924 ** Doclist indexes are themselves b-trees, however they usually consist of 8925 ** a single leaf record only. The format of each doclist index leaf page 8926 ** is: 8927 ** 8928 ** * Flags byte. Bits are: 8929 ** 0x01: Clear if leaf is also the root page, otherwise set. 8930 ** 8931 ** * Page number of fts index leaf page. As a varint. 8932 ** 8933 ** * First rowid on page indicated by previous field. As a varint. 8934 ** 8935 ** * A list of varints, one for each subsequent termless page. A 8936 ** positive delta if the termless page contains at least one rowid, 8937 ** or an 0x00 byte otherwise. 8938 ** 8939 ** Internal doclist index nodes are: 8940 ** 8941 ** * Flags byte. Bits are: 8942 ** 0x01: Clear for root page, otherwise set. 8943 ** 8944 ** * Page number of first child page. As a varint. 8945 ** 8946 ** * Copy of first rowid on page indicated by previous field. As a varint. 8947 ** 8948 ** * A list of delta-encoded varints - the first rowid on each subsequent 8949 ** child page. 8950 ** 8951 */ 8952 8953 /* 8954 ** Rowids for the averages and structure records in the %_data table. 8955 */ 8956 #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ 8957 #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ 8958 8959 /* 8960 ** Macros determining the rowids used by segment leaves and dlidx leaves 8961 ** and nodes. All nodes and leaves are stored in the %_data table with large 8962 ** positive rowids. 8963 ** 8964 ** Each segment has a unique non-zero 16-bit id. 8965 ** 8966 ** The rowid for each segment leaf is found by passing the segment id and 8967 ** the leaf page number to the FTS5_SEGMENT_ROWID macro. Leaves are numbered 8968 ** sequentially starting from 1. 8969 */ 8970 #define FTS5_DATA_ID_B 16 /* Max seg id number 65535 */ 8971 #define FTS5_DATA_DLI_B 1 /* Doclist-index flag (1 bit) */ 8972 #define FTS5_DATA_HEIGHT_B 5 /* Max dlidx tree height of 32 */ 8973 #define FTS5_DATA_PAGE_B 31 /* Max page number of 2147483648 */ 8974 8975 #define fts5_dri(segid, dlidx, height, pgno) ( \ 8976 ((i64)(segid) << (FTS5_DATA_PAGE_B+FTS5_DATA_HEIGHT_B+FTS5_DATA_DLI_B)) + \ 8977 ((i64)(dlidx) << (FTS5_DATA_PAGE_B + FTS5_DATA_HEIGHT_B)) + \ 8978 ((i64)(height) << (FTS5_DATA_PAGE_B)) + \ 8979 ((i64)(pgno)) \ 8980 ) 8981 8982 #define FTS5_SEGMENT_ROWID(segid, pgno) fts5_dri(segid, 0, 0, pgno) 8983 #define FTS5_DLIDX_ROWID(segid, height, pgno) fts5_dri(segid, 1, height, pgno) 8984 8985 #ifdef SQLITE_DEBUG 8986 static int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } 8987 #endif 8988 8989 8990 /* 8991 ** Each time a blob is read from the %_data table, it is padded with this 8992 ** many zero bytes. This makes it easier to decode the various record formats 8993 ** without overreading if the records are corrupt. 8994 */ 8995 #define FTS5_DATA_ZERO_PADDING 8 8996 #define FTS5_DATA_PADDING 20 8997 8998 typedef struct Fts5Data Fts5Data; 8999 typedef struct Fts5DlidxIter Fts5DlidxIter; 9000 typedef struct Fts5DlidxLvl Fts5DlidxLvl; 9001 typedef struct Fts5DlidxWriter Fts5DlidxWriter; 9002 typedef struct Fts5Iter Fts5Iter; 9003 typedef struct Fts5PageWriter Fts5PageWriter; 9004 typedef struct Fts5SegIter Fts5SegIter; 9005 typedef struct Fts5DoclistIter Fts5DoclistIter; 9006 typedef struct Fts5SegWriter Fts5SegWriter; 9007 typedef struct Fts5Structure Fts5Structure; 9008 typedef struct Fts5StructureLevel Fts5StructureLevel; 9009 typedef struct Fts5StructureSegment Fts5StructureSegment; 9010 9011 struct Fts5Data { 9012 u8 *p; /* Pointer to buffer containing record */ 9013 int nn; /* Size of record in bytes */ 9014 int szLeaf; /* Size of leaf without page-index */ 9015 }; 9016 9017 /* 9018 ** One object per %_data table. 9019 */ 9020 struct Fts5Index { 9021 Fts5Config *pConfig; /* Virtual table configuration */ 9022 char *zDataTbl; /* Name of %_data table */ 9023 int nWorkUnit; /* Leaf pages in a "unit" of work */ 9024 9025 /* 9026 ** Variables related to the accumulation of tokens and doclists within the 9027 ** in-memory hash tables before they are flushed to disk. 9028 */ 9029 Fts5Hash *pHash; /* Hash table for in-memory data */ 9030 int nPendingData; /* Current bytes of pending data */ 9031 i64 iWriteRowid; /* Rowid for current doc being written */ 9032 int bDelete; /* Current write is a delete */ 9033 9034 /* Error state. */ 9035 int rc; /* Current error code */ 9036 9037 /* State used by the fts5DataXXX() functions. */ 9038 sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ 9039 sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ 9040 sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ 9041 sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ 9042 sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=?" */ 9043 sqlite3_stmt *pIdxSelect; 9044 int nRead; /* Total number of blocks read */ 9045 9046 sqlite3_stmt *pDataVersion; 9047 i64 iStructVersion; /* data_version when pStruct read */ 9048 Fts5Structure *pStruct; /* Current db structure (or NULL) */ 9049 }; 9050 9051 struct Fts5DoclistIter { 9052 u8 *aEof; /* Pointer to 1 byte past end of doclist */ 9053 9054 /* Output variables. aPoslist==0 at EOF */ 9055 i64 iRowid; 9056 u8 *aPoslist; 9057 int nPoslist; 9058 int nSize; 9059 }; 9060 9061 /* 9062 ** The contents of the "structure" record for each index are represented 9063 ** using an Fts5Structure record in memory. Which uses instances of the 9064 ** other Fts5StructureXXX types as components. 9065 */ 9066 struct Fts5StructureSegment { 9067 int iSegid; /* Segment id */ 9068 int pgnoFirst; /* First leaf page number in segment */ 9069 int pgnoLast; /* Last leaf page number in segment */ 9070 }; 9071 struct Fts5StructureLevel { 9072 int nMerge; /* Number of segments in incr-merge */ 9073 int nSeg; /* Total number of segments on level */ 9074 Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ 9075 }; 9076 struct Fts5Structure { 9077 int nRef; /* Object reference count */ 9078 u64 nWriteCounter; /* Total leaves written to level 0 */ 9079 int nSegment; /* Total segments in this structure */ 9080 int nLevel; /* Number of levels in this index */ 9081 Fts5StructureLevel aLevel[1]; /* Array of nLevel level objects */ 9082 }; 9083 9084 /* 9085 ** An object of type Fts5SegWriter is used to write to segments. 9086 */ 9087 struct Fts5PageWriter { 9088 int pgno; /* Page number for this page */ 9089 int iPrevPgidx; /* Previous value written into pgidx */ 9090 Fts5Buffer buf; /* Buffer containing leaf data */ 9091 Fts5Buffer pgidx; /* Buffer containing page-index */ 9092 Fts5Buffer term; /* Buffer containing previous term on page */ 9093 }; 9094 struct Fts5DlidxWriter { 9095 int pgno; /* Page number for this page */ 9096 int bPrevValid; /* True if iPrev is valid */ 9097 i64 iPrev; /* Previous rowid value written to page */ 9098 Fts5Buffer buf; /* Buffer containing page data */ 9099 }; 9100 struct Fts5SegWriter { 9101 int iSegid; /* Segid to write to */ 9102 Fts5PageWriter writer; /* PageWriter object */ 9103 i64 iPrevRowid; /* Previous rowid written to current leaf */ 9104 u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ 9105 u8 bFirstRowidInPage; /* True if next rowid is first in page */ 9106 /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ 9107 u8 bFirstTermInPage; /* True if next term will be first in leaf */ 9108 int nLeafWritten; /* Number of leaf pages written */ 9109 int nEmpty; /* Number of contiguous term-less nodes */ 9110 9111 int nDlidx; /* Allocated size of aDlidx[] array */ 9112 Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ 9113 9114 /* Values to insert into the %_idx table */ 9115 Fts5Buffer btterm; /* Next term to insert into %_idx table */ 9116 int iBtPage; /* Page number corresponding to btterm */ 9117 }; 9118 9119 typedef struct Fts5CResult Fts5CResult; 9120 struct Fts5CResult { 9121 u16 iFirst; /* aSeg[] index of firstest iterator */ 9122 u8 bTermEq; /* True if the terms are equal */ 9123 }; 9124 9125 /* 9126 ** Object for iterating through a single segment, visiting each term/rowid 9127 ** pair in the segment. 9128 ** 9129 ** pSeg: 9130 ** The segment to iterate through. 9131 ** 9132 ** iLeafPgno: 9133 ** Current leaf page number within segment. 9134 ** 9135 ** iLeafOffset: 9136 ** Byte offset within the current leaf that is the first byte of the 9137 ** position list data (one byte passed the position-list size field). 9138 ** rowid field of the current entry. Usually this is the size field of the 9139 ** position list data. The exception is if the rowid for the current entry 9140 ** is the last thing on the leaf page. 9141 ** 9142 ** pLeaf: 9143 ** Buffer containing current leaf page data. Set to NULL at EOF. 9144 ** 9145 ** iTermLeafPgno, iTermLeafOffset: 9146 ** Leaf page number containing the last term read from the segment. And 9147 ** the offset immediately following the term data. 9148 ** 9149 ** flags: 9150 ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: 9151 ** 9152 ** FTS5_SEGITER_ONETERM: 9153 ** If set, set the iterator to point to EOF after the current doclist 9154 ** has been exhausted. Do not proceed to the next term in the segment. 9155 ** 9156 ** FTS5_SEGITER_REVERSE: 9157 ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If 9158 ** it is set, iterate through rowid in descending order instead of the 9159 ** default ascending order. 9160 ** 9161 ** iRowidOffset/nRowidOffset/aRowidOffset: 9162 ** These are used if the FTS5_SEGITER_REVERSE flag is set. 9163 ** 9164 ** For each rowid on the page corresponding to the current term, the 9165 ** corresponding aRowidOffset[] entry is set to the byte offset of the 9166 ** start of the "position-list-size" field within the page. 9167 ** 9168 ** iTermIdx: 9169 ** Index of current term on iTermLeafPgno. 9170 */ 9171 struct Fts5SegIter { 9172 Fts5StructureSegment *pSeg; /* Segment to iterate through */ 9173 int flags; /* Mask of configuration flags */ 9174 int iLeafPgno; /* Current leaf page number */ 9175 Fts5Data *pLeaf; /* Current leaf data */ 9176 Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ 9177 i64 iLeafOffset; /* Byte offset within current leaf */ 9178 9179 /* Next method */ 9180 void (*xNext)(Fts5Index*, Fts5SegIter*, int*); 9181 9182 /* The page and offset from which the current term was read. The offset 9183 ** is the offset of the first rowid in the current doclist. */ 9184 int iTermLeafPgno; 9185 int iTermLeafOffset; 9186 9187 int iPgidxOff; /* Next offset in pgidx */ 9188 int iEndofDoclist; 9189 9190 /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ 9191 int iRowidOffset; /* Current entry in aRowidOffset[] */ 9192 int nRowidOffset; /* Allocated size of aRowidOffset[] array */ 9193 int *aRowidOffset; /* Array of offset to rowid fields */ 9194 9195 Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ 9196 9197 /* Variables populated based on current entry. */ 9198 Fts5Buffer term; /* Current term */ 9199 i64 iRowid; /* Current rowid */ 9200 int nPos; /* Number of bytes in current position list */ 9201 u8 bDel; /* True if the delete flag is set */ 9202 }; 9203 9204 /* 9205 ** Argument is a pointer to an Fts5Data structure that contains a 9206 ** leaf page. 9207 */ 9208 #define ASSERT_SZLEAF_OK(x) assert( \ 9209 (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ 9210 ) 9211 9212 #define FTS5_SEGITER_ONETERM 0x01 9213 #define FTS5_SEGITER_REVERSE 0x02 9214 9215 /* 9216 ** Argument is a pointer to an Fts5Data structure that contains a leaf 9217 ** page. This macro evaluates to true if the leaf contains no terms, or 9218 ** false if it contains at least one term. 9219 */ 9220 #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) 9221 9222 #define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) 9223 9224 #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) 9225 9226 /* 9227 ** Object for iterating through the merged results of one or more segments, 9228 ** visiting each term/rowid pair in the merged data. 9229 ** 9230 ** nSeg is always a power of two greater than or equal to the number of 9231 ** segments that this object is merging data from. Both the aSeg[] and 9232 ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded 9233 ** with zeroed objects - these are handled as if they were iterators opened 9234 ** on empty segments. 9235 ** 9236 ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an 9237 ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the 9238 ** comparison in this context is the index of the iterator that currently 9239 ** points to the smaller term/rowid combination. Iterators at EOF are 9240 ** considered to be greater than all other iterators. 9241 ** 9242 ** aFirst[1] contains the index in aSeg[] of the iterator that points to 9243 ** the smallest key overall. aFirst[0] is unused. 9244 ** 9245 ** poslist: 9246 ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. 9247 ** There is no way to tell if this is populated or not. 9248 */ 9249 struct Fts5Iter { 9250 Fts5IndexIter base; /* Base class containing output vars */ 9251 9252 Fts5Index *pIndex; /* Index that owns this iterator */ 9253 Fts5Buffer poslist; /* Buffer containing current poslist */ 9254 Fts5Colset *pColset; /* Restrict matches to these columns */ 9255 9256 /* Invoked to set output variables. */ 9257 void (*xSetOutputs)(Fts5Iter*, Fts5SegIter*); 9258 9259 int nSeg; /* Size of aSeg[] array */ 9260 int bRev; /* True to iterate in reverse order */ 9261 u8 bSkipEmpty; /* True to skip deleted entries */ 9262 9263 i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ 9264 Fts5CResult *aFirst; /* Current merge state (see above) */ 9265 Fts5SegIter aSeg[1]; /* Array of segment iterators */ 9266 }; 9267 9268 9269 /* 9270 ** An instance of the following type is used to iterate through the contents 9271 ** of a doclist-index record. 9272 ** 9273 ** pData: 9274 ** Record containing the doclist-index data. 9275 ** 9276 ** bEof: 9277 ** Set to true once iterator has reached EOF. 9278 ** 9279 ** iOff: 9280 ** Set to the current offset within record pData. 9281 */ 9282 struct Fts5DlidxLvl { 9283 Fts5Data *pData; /* Data for current page of this level */ 9284 int iOff; /* Current offset into pData */ 9285 int bEof; /* At EOF already */ 9286 int iFirstOff; /* Used by reverse iterators */ 9287 9288 /* Output variables */ 9289 int iLeafPgno; /* Page number of current leaf page */ 9290 i64 iRowid; /* First rowid on leaf iLeafPgno */ 9291 }; 9292 struct Fts5DlidxIter { 9293 int nLvl; 9294 int iSegid; 9295 Fts5DlidxLvl aLvl[1]; 9296 }; 9297 9298 static void fts5PutU16(u8 *aOut, u16 iVal){ 9299 aOut[0] = (iVal>>8); 9300 aOut[1] = (iVal&0xFF); 9301 } 9302 9303 static u16 fts5GetU16(const u8 *aIn){ 9304 return ((u16)aIn[0] << 8) + aIn[1]; 9305 } 9306 9307 /* 9308 ** Allocate and return a buffer at least nByte bytes in size. 9309 ** 9310 ** If an OOM error is encountered, return NULL and set the error code in 9311 ** the Fts5Index handle passed as the first argument. 9312 */ 9313 static void *fts5IdxMalloc(Fts5Index *p, sqlite3_int64 nByte){ 9314 return sqlite3Fts5MallocZero(&p->rc, nByte); 9315 } 9316 9317 /* 9318 ** Compare the contents of the pLeft buffer with the pRight/nRight blob. 9319 ** 9320 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or 9321 ** +ve if pRight is smaller than pLeft. In other words: 9322 ** 9323 ** res = *pLeft - *pRight 9324 */ 9325 #ifdef SQLITE_DEBUG 9326 static int fts5BufferCompareBlob( 9327 Fts5Buffer *pLeft, /* Left hand side of comparison */ 9328 const u8 *pRight, int nRight /* Right hand side of comparison */ 9329 ){ 9330 int nCmp = MIN(pLeft->n, nRight); 9331 int res = memcmp(pLeft->p, pRight, nCmp); 9332 return (res==0 ? (pLeft->n - nRight) : res); 9333 } 9334 #endif 9335 9336 /* 9337 ** Compare the contents of the two buffers using memcmp(). If one buffer 9338 ** is a prefix of the other, it is considered the lesser. 9339 ** 9340 ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or 9341 ** +ve if pRight is smaller than pLeft. In other words: 9342 ** 9343 ** res = *pLeft - *pRight 9344 */ 9345 static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ 9346 int nCmp, res; 9347 nCmp = MIN(pLeft->n, pRight->n); 9348 assert( nCmp<=0 || pLeft->p!=0 ); 9349 assert( nCmp<=0 || pRight->p!=0 ); 9350 res = fts5Memcmp(pLeft->p, pRight->p, nCmp); 9351 return (res==0 ? (pLeft->n - pRight->n) : res); 9352 } 9353 9354 static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ 9355 int ret; 9356 fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); 9357 return ret; 9358 } 9359 9360 /* 9361 ** Close the read-only blob handle, if it is open. 9362 */ 9363 static void sqlite3Fts5IndexCloseReader(Fts5Index *p){ 9364 if( p->pReader ){ 9365 sqlite3_blob *pReader = p->pReader; 9366 p->pReader = 0; 9367 sqlite3_blob_close(pReader); 9368 } 9369 } 9370 9371 /* 9372 ** Retrieve a record from the %_data table. 9373 ** 9374 ** If an error occurs, NULL is returned and an error left in the 9375 ** Fts5Index object. 9376 */ 9377 static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ 9378 Fts5Data *pRet = 0; 9379 if( p->rc==SQLITE_OK ){ 9380 int rc = SQLITE_OK; 9381 9382 if( p->pReader ){ 9383 /* This call may return SQLITE_ABORT if there has been a savepoint 9384 ** rollback since it was last used. In this case a new blob handle 9385 ** is required. */ 9386 sqlite3_blob *pBlob = p->pReader; 9387 p->pReader = 0; 9388 rc = sqlite3_blob_reopen(pBlob, iRowid); 9389 assert( p->pReader==0 ); 9390 p->pReader = pBlob; 9391 if( rc!=SQLITE_OK ){ 9392 sqlite3Fts5IndexCloseReader(p); 9393 } 9394 if( rc==SQLITE_ABORT ) rc = SQLITE_OK; 9395 } 9396 9397 /* If the blob handle is not open at this point, open it and seek 9398 ** to the requested entry. */ 9399 if( p->pReader==0 && rc==SQLITE_OK ){ 9400 Fts5Config *pConfig = p->pConfig; 9401 rc = sqlite3_blob_open(pConfig->db, 9402 pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader 9403 ); 9404 } 9405 9406 /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls 9407 ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. 9408 ** All the reasons those functions might return SQLITE_ERROR - missing 9409 ** table, missing row, non-blob/text in block column - indicate 9410 ** backing store corruption. */ 9411 if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; 9412 9413 if( rc==SQLITE_OK ){ 9414 u8 *aOut = 0; /* Read blob data into this buffer */ 9415 int nByte = sqlite3_blob_bytes(p->pReader); 9416 sqlite3_int64 nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; 9417 pRet = (Fts5Data*)sqlite3_malloc64(nAlloc); 9418 if( pRet ){ 9419 pRet->nn = nByte; 9420 aOut = pRet->p = (u8*)&pRet[1]; 9421 }else{ 9422 rc = SQLITE_NOMEM; 9423 } 9424 9425 if( rc==SQLITE_OK ){ 9426 rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); 9427 } 9428 if( rc!=SQLITE_OK ){ 9429 sqlite3_free(pRet); 9430 pRet = 0; 9431 }else{ 9432 /* TODO1: Fix this */ 9433 pRet->p[nByte] = 0x00; 9434 pRet->p[nByte+1] = 0x00; 9435 pRet->szLeaf = fts5GetU16(&pRet->p[2]); 9436 } 9437 } 9438 p->rc = rc; 9439 p->nRead++; 9440 } 9441 9442 assert( (pRet==0)==(p->rc!=SQLITE_OK) ); 9443 return pRet; 9444 } 9445 9446 9447 /* 9448 ** Release a reference to data record returned by an earlier call to 9449 ** fts5DataRead(). 9450 */ 9451 static void fts5DataRelease(Fts5Data *pData){ 9452 sqlite3_free(pData); 9453 } 9454 9455 static Fts5Data *fts5LeafRead(Fts5Index *p, i64 iRowid){ 9456 Fts5Data *pRet = fts5DataRead(p, iRowid); 9457 if( pRet ){ 9458 if( pRet->nn<4 || pRet->szLeaf>pRet->nn ){ 9459 p->rc = FTS5_CORRUPT; 9460 fts5DataRelease(pRet); 9461 pRet = 0; 9462 } 9463 } 9464 return pRet; 9465 } 9466 9467 static int fts5IndexPrepareStmt( 9468 Fts5Index *p, 9469 sqlite3_stmt **ppStmt, 9470 char *zSql 9471 ){ 9472 if( p->rc==SQLITE_OK ){ 9473 if( zSql ){ 9474 p->rc = sqlite3_prepare_v3(p->pConfig->db, zSql, -1, 9475 SQLITE_PREPARE_PERSISTENT|SQLITE_PREPARE_NO_VTAB, 9476 ppStmt, 0); 9477 }else{ 9478 p->rc = SQLITE_NOMEM; 9479 } 9480 } 9481 sqlite3_free(zSql); 9482 return p->rc; 9483 } 9484 9485 9486 /* 9487 ** INSERT OR REPLACE a record into the %_data table. 9488 */ 9489 static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ 9490 if( p->rc!=SQLITE_OK ) return; 9491 9492 if( p->pWriter==0 ){ 9493 Fts5Config *pConfig = p->pConfig; 9494 fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( 9495 "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", 9496 pConfig->zDb, pConfig->zName 9497 )); 9498 if( p->rc ) return; 9499 } 9500 9501 sqlite3_bind_int64(p->pWriter, 1, iRowid); 9502 sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); 9503 sqlite3_step(p->pWriter); 9504 p->rc = sqlite3_reset(p->pWriter); 9505 sqlite3_bind_null(p->pWriter, 2); 9506 } 9507 9508 /* 9509 ** Execute the following SQL: 9510 ** 9511 ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast 9512 */ 9513 static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ 9514 if( p->rc!=SQLITE_OK ) return; 9515 9516 if( p->pDeleter==0 ){ 9517 Fts5Config *pConfig = p->pConfig; 9518 char *zSql = sqlite3_mprintf( 9519 "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", 9520 pConfig->zDb, pConfig->zName 9521 ); 9522 if( fts5IndexPrepareStmt(p, &p->pDeleter, zSql) ) return; 9523 } 9524 9525 sqlite3_bind_int64(p->pDeleter, 1, iFirst); 9526 sqlite3_bind_int64(p->pDeleter, 2, iLast); 9527 sqlite3_step(p->pDeleter); 9528 p->rc = sqlite3_reset(p->pDeleter); 9529 } 9530 9531 /* 9532 ** Remove all records associated with segment iSegid. 9533 */ 9534 static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ 9535 i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0); 9536 i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0)-1; 9537 fts5DataDelete(p, iFirst, iLast); 9538 if( p->pIdxDeleter==0 ){ 9539 Fts5Config *pConfig = p->pConfig; 9540 fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( 9541 "DELETE FROM '%q'.'%q_idx' WHERE segid=?", 9542 pConfig->zDb, pConfig->zName 9543 )); 9544 } 9545 if( p->rc==SQLITE_OK ){ 9546 sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); 9547 sqlite3_step(p->pIdxDeleter); 9548 p->rc = sqlite3_reset(p->pIdxDeleter); 9549 } 9550 } 9551 9552 /* 9553 ** Release a reference to an Fts5Structure object returned by an earlier 9554 ** call to fts5StructureRead() or fts5StructureDecode(). 9555 */ 9556 static void fts5StructureRelease(Fts5Structure *pStruct){ 9557 if( pStruct && 0>=(--pStruct->nRef) ){ 9558 int i; 9559 assert( pStruct->nRef==0 ); 9560 for(i=0; i<pStruct->nLevel; i++){ 9561 sqlite3_free(pStruct->aLevel[i].aSeg); 9562 } 9563 sqlite3_free(pStruct); 9564 } 9565 } 9566 9567 static void fts5StructureRef(Fts5Structure *pStruct){ 9568 pStruct->nRef++; 9569 } 9570 9571 static void *sqlite3Fts5StructureRef(Fts5Index *p){ 9572 fts5StructureRef(p->pStruct); 9573 return (void*)p->pStruct; 9574 } 9575 static void sqlite3Fts5StructureRelease(void *p){ 9576 if( p ){ 9577 fts5StructureRelease((Fts5Structure*)p); 9578 } 9579 } 9580 static int sqlite3Fts5StructureTest(Fts5Index *p, void *pStruct){ 9581 if( p->pStruct!=(Fts5Structure*)pStruct ){ 9582 return SQLITE_ABORT; 9583 } 9584 return SQLITE_OK; 9585 } 9586 9587 /* 9588 ** Ensure that structure object (*pp) is writable. 9589 ** 9590 ** This function is a no-op if (*pRc) is not SQLITE_OK when it is called. If 9591 ** an error occurs, (*pRc) is set to an SQLite error code before returning. 9592 */ 9593 static void fts5StructureMakeWritable(int *pRc, Fts5Structure **pp){ 9594 Fts5Structure *p = *pp; 9595 if( *pRc==SQLITE_OK && p->nRef>1 ){ 9596 i64 nByte = sizeof(Fts5Structure)+(p->nLevel-1)*sizeof(Fts5StructureLevel); 9597 Fts5Structure *pNew; 9598 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(pRc, nByte); 9599 if( pNew ){ 9600 int i; 9601 memcpy(pNew, p, nByte); 9602 for(i=0; i<p->nLevel; i++) pNew->aLevel[i].aSeg = 0; 9603 for(i=0; i<p->nLevel; i++){ 9604 Fts5StructureLevel *pLvl = &pNew->aLevel[i]; 9605 nByte = sizeof(Fts5StructureSegment) * pNew->aLevel[i].nSeg; 9606 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(pRc, nByte); 9607 if( pLvl->aSeg==0 ){ 9608 for(i=0; i<p->nLevel; i++){ 9609 sqlite3_free(pNew->aLevel[i].aSeg); 9610 } 9611 sqlite3_free(pNew); 9612 return; 9613 } 9614 memcpy(pLvl->aSeg, p->aLevel[i].aSeg, nByte); 9615 } 9616 p->nRef--; 9617 pNew->nRef = 1; 9618 } 9619 *pp = pNew; 9620 } 9621 } 9622 9623 /* 9624 ** Deserialize and return the structure record currently stored in serialized 9625 ** form within buffer pData/nData. 9626 ** 9627 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array 9628 ** are over-allocated by one slot. This allows the structure contents 9629 ** to be more easily edited. 9630 ** 9631 ** If an error occurs, *ppOut is set to NULL and an SQLite error code 9632 ** returned. Otherwise, *ppOut is set to point to the new object and 9633 ** SQLITE_OK returned. 9634 */ 9635 static int fts5StructureDecode( 9636 const u8 *pData, /* Buffer containing serialized structure */ 9637 int nData, /* Size of buffer pData in bytes */ 9638 int *piCookie, /* Configuration cookie value */ 9639 Fts5Structure **ppOut /* OUT: Deserialized object */ 9640 ){ 9641 int rc = SQLITE_OK; 9642 int i = 0; 9643 int iLvl; 9644 int nLevel = 0; 9645 int nSegment = 0; 9646 sqlite3_int64 nByte; /* Bytes of space to allocate at pRet */ 9647 Fts5Structure *pRet = 0; /* Structure object to return */ 9648 9649 /* Grab the cookie value */ 9650 if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); 9651 i = 4; 9652 9653 /* Read the total number of levels and segments from the start of the 9654 ** structure record. */ 9655 i += fts5GetVarint32(&pData[i], nLevel); 9656 i += fts5GetVarint32(&pData[i], nSegment); 9657 if( nLevel>FTS5_MAX_SEGMENT || nLevel<0 9658 || nSegment>FTS5_MAX_SEGMENT || nSegment<0 9659 ){ 9660 return FTS5_CORRUPT; 9661 } 9662 nByte = ( 9663 sizeof(Fts5Structure) + /* Main structure */ 9664 sizeof(Fts5StructureLevel) * (nLevel-1) /* aLevel[] array */ 9665 ); 9666 pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); 9667 9668 if( pRet ){ 9669 pRet->nRef = 1; 9670 pRet->nLevel = nLevel; 9671 pRet->nSegment = nSegment; 9672 i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); 9673 9674 for(iLvl=0; rc==SQLITE_OK && iLvl<nLevel; iLvl++){ 9675 Fts5StructureLevel *pLvl = &pRet->aLevel[iLvl]; 9676 int nTotal = 0; 9677 int iSeg; 9678 9679 if( i>=nData ){ 9680 rc = FTS5_CORRUPT; 9681 }else{ 9682 i += fts5GetVarint32(&pData[i], pLvl->nMerge); 9683 i += fts5GetVarint32(&pData[i], nTotal); 9684 if( nTotal<pLvl->nMerge ) rc = FTS5_CORRUPT; 9685 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, 9686 nTotal * sizeof(Fts5StructureSegment) 9687 ); 9688 nSegment -= nTotal; 9689 } 9690 9691 if( rc==SQLITE_OK ){ 9692 pLvl->nSeg = nTotal; 9693 for(iSeg=0; iSeg<nTotal; iSeg++){ 9694 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; 9695 if( i>=nData ){ 9696 rc = FTS5_CORRUPT; 9697 break; 9698 } 9699 i += fts5GetVarint32(&pData[i], pSeg->iSegid); 9700 i += fts5GetVarint32(&pData[i], pSeg->pgnoFirst); 9701 i += fts5GetVarint32(&pData[i], pSeg->pgnoLast); 9702 if( pSeg->pgnoLast<pSeg->pgnoFirst ){ 9703 rc = FTS5_CORRUPT; 9704 break; 9705 } 9706 } 9707 if( iLvl>0 && pLvl[-1].nMerge && nTotal==0 ) rc = FTS5_CORRUPT; 9708 if( iLvl==nLevel-1 && pLvl->nMerge ) rc = FTS5_CORRUPT; 9709 } 9710 } 9711 if( nSegment!=0 && rc==SQLITE_OK ) rc = FTS5_CORRUPT; 9712 9713 if( rc!=SQLITE_OK ){ 9714 fts5StructureRelease(pRet); 9715 pRet = 0; 9716 } 9717 } 9718 9719 *ppOut = pRet; 9720 return rc; 9721 } 9722 9723 /* 9724 ** Add a level to the Fts5Structure.aLevel[] array of structure object 9725 ** (*ppStruct). 9726 */ 9727 static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ 9728 fts5StructureMakeWritable(pRc, ppStruct); 9729 if( *pRc==SQLITE_OK ){ 9730 Fts5Structure *pStruct = *ppStruct; 9731 int nLevel = pStruct->nLevel; 9732 sqlite3_int64 nByte = ( 9733 sizeof(Fts5Structure) + /* Main structure */ 9734 sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ 9735 ); 9736 9737 pStruct = sqlite3_realloc64(pStruct, nByte); 9738 if( pStruct ){ 9739 memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); 9740 pStruct->nLevel++; 9741 *ppStruct = pStruct; 9742 }else{ 9743 *pRc = SQLITE_NOMEM; 9744 } 9745 } 9746 } 9747 9748 /* 9749 ** Extend level iLvl so that there is room for at least nExtra more 9750 ** segments. 9751 */ 9752 static void fts5StructureExtendLevel( 9753 int *pRc, 9754 Fts5Structure *pStruct, 9755 int iLvl, 9756 int nExtra, 9757 int bInsert 9758 ){ 9759 if( *pRc==SQLITE_OK ){ 9760 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 9761 Fts5StructureSegment *aNew; 9762 sqlite3_int64 nByte; 9763 9764 nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); 9765 aNew = sqlite3_realloc64(pLvl->aSeg, nByte); 9766 if( aNew ){ 9767 if( bInsert==0 ){ 9768 memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); 9769 }else{ 9770 int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); 9771 memmove(&aNew[nExtra], aNew, nMove); 9772 memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); 9773 } 9774 pLvl->aSeg = aNew; 9775 }else{ 9776 *pRc = SQLITE_NOMEM; 9777 } 9778 } 9779 } 9780 9781 static Fts5Structure *fts5StructureReadUncached(Fts5Index *p){ 9782 Fts5Structure *pRet = 0; 9783 Fts5Config *pConfig = p->pConfig; 9784 int iCookie; /* Configuration cookie */ 9785 Fts5Data *pData; 9786 9787 pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); 9788 if( p->rc==SQLITE_OK ){ 9789 /* TODO: Do we need this if the leaf-index is appended? Probably... */ 9790 memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); 9791 p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); 9792 if( p->rc==SQLITE_OK && (pConfig->pgsz==0 || pConfig->iCookie!=iCookie) ){ 9793 p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); 9794 } 9795 fts5DataRelease(pData); 9796 if( p->rc!=SQLITE_OK ){ 9797 fts5StructureRelease(pRet); 9798 pRet = 0; 9799 } 9800 } 9801 9802 return pRet; 9803 } 9804 9805 static i64 fts5IndexDataVersion(Fts5Index *p){ 9806 i64 iVersion = 0; 9807 9808 if( p->rc==SQLITE_OK ){ 9809 if( p->pDataVersion==0 ){ 9810 p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, 9811 sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) 9812 ); 9813 if( p->rc ) return 0; 9814 } 9815 9816 if( SQLITE_ROW==sqlite3_step(p->pDataVersion) ){ 9817 iVersion = sqlite3_column_int64(p->pDataVersion, 0); 9818 } 9819 p->rc = sqlite3_reset(p->pDataVersion); 9820 } 9821 9822 return iVersion; 9823 } 9824 9825 /* 9826 ** Read, deserialize and return the structure record. 9827 ** 9828 ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array 9829 ** are over-allocated as described for function fts5StructureDecode() 9830 ** above. 9831 ** 9832 ** If an error occurs, NULL is returned and an error code left in the 9833 ** Fts5Index handle. If an error has already occurred when this function 9834 ** is called, it is a no-op. 9835 */ 9836 static Fts5Structure *fts5StructureRead(Fts5Index *p){ 9837 9838 if( p->pStruct==0 ){ 9839 p->iStructVersion = fts5IndexDataVersion(p); 9840 if( p->rc==SQLITE_OK ){ 9841 p->pStruct = fts5StructureReadUncached(p); 9842 } 9843 } 9844 9845 #if 0 9846 else{ 9847 Fts5Structure *pTest = fts5StructureReadUncached(p); 9848 if( pTest ){ 9849 int i, j; 9850 assert_nc( p->pStruct->nSegment==pTest->nSegment ); 9851 assert_nc( p->pStruct->nLevel==pTest->nLevel ); 9852 for(i=0; i<pTest->nLevel; i++){ 9853 assert_nc( p->pStruct->aLevel[i].nMerge==pTest->aLevel[i].nMerge ); 9854 assert_nc( p->pStruct->aLevel[i].nSeg==pTest->aLevel[i].nSeg ); 9855 for(j=0; j<pTest->aLevel[i].nSeg; j++){ 9856 Fts5StructureSegment *p1 = &pTest->aLevel[i].aSeg[j]; 9857 Fts5StructureSegment *p2 = &p->pStruct->aLevel[i].aSeg[j]; 9858 assert_nc( p1->iSegid==p2->iSegid ); 9859 assert_nc( p1->pgnoFirst==p2->pgnoFirst ); 9860 assert_nc( p1->pgnoLast==p2->pgnoLast ); 9861 } 9862 } 9863 fts5StructureRelease(pTest); 9864 } 9865 } 9866 #endif 9867 9868 if( p->rc!=SQLITE_OK ) return 0; 9869 assert( p->iStructVersion!=0 ); 9870 assert( p->pStruct!=0 ); 9871 fts5StructureRef(p->pStruct); 9872 return p->pStruct; 9873 } 9874 9875 static void fts5StructureInvalidate(Fts5Index *p){ 9876 if( p->pStruct ){ 9877 fts5StructureRelease(p->pStruct); 9878 p->pStruct = 0; 9879 } 9880 } 9881 9882 /* 9883 ** Return the total number of segments in index structure pStruct. This 9884 ** function is only ever used as part of assert() conditions. 9885 */ 9886 #ifdef SQLITE_DEBUG 9887 static int fts5StructureCountSegments(Fts5Structure *pStruct){ 9888 int nSegment = 0; /* Total number of segments */ 9889 if( pStruct ){ 9890 int iLvl; /* Used to iterate through levels */ 9891 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 9892 nSegment += pStruct->aLevel[iLvl].nSeg; 9893 } 9894 } 9895 9896 return nSegment; 9897 } 9898 #endif 9899 9900 #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ 9901 assert( (pBuf)->nSpace>=((pBuf)->n+nBlob) ); \ 9902 memcpy(&(pBuf)->p[(pBuf)->n], pBlob, nBlob); \ 9903 (pBuf)->n += nBlob; \ 9904 } 9905 9906 #define fts5BufferSafeAppendVarint(pBuf, iVal) { \ 9907 (pBuf)->n += sqlite3Fts5PutVarint(&(pBuf)->p[(pBuf)->n], (iVal)); \ 9908 assert( (pBuf)->nSpace>=(pBuf)->n ); \ 9909 } 9910 9911 9912 /* 9913 ** Serialize and store the "structure" record. 9914 ** 9915 ** If an error occurs, leave an error code in the Fts5Index object. If an 9916 ** error has already occurred, this function is a no-op. 9917 */ 9918 static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ 9919 if( p->rc==SQLITE_OK ){ 9920 Fts5Buffer buf; /* Buffer to serialize record into */ 9921 int iLvl; /* Used to iterate through levels */ 9922 int iCookie; /* Cookie value to store */ 9923 9924 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); 9925 memset(&buf, 0, sizeof(Fts5Buffer)); 9926 9927 /* Append the current configuration cookie */ 9928 iCookie = p->pConfig->iCookie; 9929 if( iCookie<0 ) iCookie = 0; 9930 9931 if( 0==sqlite3Fts5BufferSize(&p->rc, &buf, 4+9+9+9) ){ 9932 sqlite3Fts5Put32(buf.p, iCookie); 9933 buf.n = 4; 9934 fts5BufferSafeAppendVarint(&buf, pStruct->nLevel); 9935 fts5BufferSafeAppendVarint(&buf, pStruct->nSegment); 9936 fts5BufferSafeAppendVarint(&buf, (i64)pStruct->nWriteCounter); 9937 } 9938 9939 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 9940 int iSeg; /* Used to iterate through segments */ 9941 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 9942 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); 9943 fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); 9944 assert( pLvl->nMerge<=pLvl->nSeg ); 9945 9946 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ 9947 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); 9948 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); 9949 fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); 9950 } 9951 } 9952 9953 fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); 9954 fts5BufferFree(&buf); 9955 } 9956 } 9957 9958 #if 0 9959 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); 9960 static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ 9961 int rc = SQLITE_OK; 9962 Fts5Buffer buf; 9963 memset(&buf, 0, sizeof(buf)); 9964 fts5DebugStructure(&rc, &buf, pStruct); 9965 fprintf(stdout, "%s: %s\n", zCaption, buf.p); 9966 fflush(stdout); 9967 fts5BufferFree(&buf); 9968 } 9969 #else 9970 # define fts5PrintStructure(x,y) 9971 #endif 9972 9973 static int fts5SegmentSize(Fts5StructureSegment *pSeg){ 9974 return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; 9975 } 9976 9977 /* 9978 ** Return a copy of index structure pStruct. Except, promote as many 9979 ** segments as possible to level iPromote. If an OOM occurs, NULL is 9980 ** returned. 9981 */ 9982 static void fts5StructurePromoteTo( 9983 Fts5Index *p, 9984 int iPromote, 9985 int szPromote, 9986 Fts5Structure *pStruct 9987 ){ 9988 int il, is; 9989 Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; 9990 9991 if( pOut->nMerge==0 ){ 9992 for(il=iPromote+1; il<pStruct->nLevel; il++){ 9993 Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; 9994 if( pLvl->nMerge ) return; 9995 for(is=pLvl->nSeg-1; is>=0; is--){ 9996 int sz = fts5SegmentSize(&pLvl->aSeg[is]); 9997 if( sz>szPromote ) return; 9998 fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); 9999 if( p->rc ) return; 10000 memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); 10001 pOut->nSeg++; 10002 pLvl->nSeg--; 10003 } 10004 } 10005 } 10006 } 10007 10008 /* 10009 ** A new segment has just been written to level iLvl of index structure 10010 ** pStruct. This function determines if any segments should be promoted 10011 ** as a result. Segments are promoted in two scenarios: 10012 ** 10013 ** a) If the segment just written is smaller than one or more segments 10014 ** within the previous populated level, it is promoted to the previous 10015 ** populated level. 10016 ** 10017 ** b) If the segment just written is larger than the newest segment on 10018 ** the next populated level, then that segment, and any other adjacent 10019 ** segments that are also smaller than the one just written, are 10020 ** promoted. 10021 ** 10022 ** If one or more segments are promoted, the structure object is updated 10023 ** to reflect this. 10024 */ 10025 static void fts5StructurePromote( 10026 Fts5Index *p, /* FTS5 backend object */ 10027 int iLvl, /* Index level just updated */ 10028 Fts5Structure *pStruct /* Index structure */ 10029 ){ 10030 if( p->rc==SQLITE_OK ){ 10031 int iTst; 10032 int iPromote = -1; 10033 int szPromote = 0; /* Promote anything this size or smaller */ 10034 Fts5StructureSegment *pSeg; /* Segment just written */ 10035 int szSeg; /* Size of segment just written */ 10036 int nSeg = pStruct->aLevel[iLvl].nSeg; 10037 10038 if( nSeg==0 ) return; 10039 pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; 10040 szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); 10041 10042 /* Check for condition (a) */ 10043 for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); 10044 if( iTst>=0 ){ 10045 int i; 10046 int szMax = 0; 10047 Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; 10048 assert( pTst->nMerge==0 ); 10049 for(i=0; i<pTst->nSeg; i++){ 10050 int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; 10051 if( sz>szMax ) szMax = sz; 10052 } 10053 if( szMax>=szSeg ){ 10054 /* Condition (a) is true. Promote the newest segment on level 10055 ** iLvl to level iTst. */ 10056 iPromote = iTst; 10057 szPromote = szMax; 10058 } 10059 } 10060 10061 /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() 10062 ** is a no-op if it is not. */ 10063 if( iPromote<0 ){ 10064 iPromote = iLvl; 10065 szPromote = szSeg; 10066 } 10067 fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); 10068 } 10069 } 10070 10071 10072 /* 10073 ** Advance the iterator passed as the only argument. If the end of the 10074 ** doclist-index page is reached, return non-zero. 10075 */ 10076 static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ 10077 Fts5Data *pData = pLvl->pData; 10078 10079 if( pLvl->iOff==0 ){ 10080 assert( pLvl->bEof==0 ); 10081 pLvl->iOff = 1; 10082 pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); 10083 pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); 10084 pLvl->iFirstOff = pLvl->iOff; 10085 }else{ 10086 int iOff; 10087 for(iOff=pLvl->iOff; iOff<pData->nn; iOff++){ 10088 if( pData->p[iOff] ) break; 10089 } 10090 10091 if( iOff<pData->nn ){ 10092 i64 iVal; 10093 pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; 10094 iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); 10095 pLvl->iRowid += iVal; 10096 pLvl->iOff = iOff; 10097 }else{ 10098 pLvl->bEof = 1; 10099 } 10100 } 10101 10102 return pLvl->bEof; 10103 } 10104 10105 /* 10106 ** Advance the iterator passed as the only argument. 10107 */ 10108 static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 10109 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 10110 10111 assert( iLvl<pIter->nLvl ); 10112 if( fts5DlidxLvlNext(pLvl) ){ 10113 if( (iLvl+1) < pIter->nLvl ){ 10114 fts5DlidxIterNextR(p, pIter, iLvl+1); 10115 if( pLvl[1].bEof==0 ){ 10116 fts5DataRelease(pLvl->pData); 10117 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 10118 pLvl->pData = fts5DataRead(p, 10119 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 10120 ); 10121 if( pLvl->pData ) fts5DlidxLvlNext(pLvl); 10122 } 10123 } 10124 } 10125 10126 return pIter->aLvl[0].bEof; 10127 } 10128 static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ 10129 return fts5DlidxIterNextR(p, pIter, 0); 10130 } 10131 10132 /* 10133 ** The iterator passed as the first argument has the following fields set 10134 ** as follows. This function sets up the rest of the iterator so that it 10135 ** points to the first rowid in the doclist-index. 10136 ** 10137 ** pData: 10138 ** pointer to doclist-index record, 10139 ** 10140 ** When this function is called pIter->iLeafPgno is the page number the 10141 ** doclist is associated with (the one featuring the term). 10142 */ 10143 static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ 10144 int i; 10145 for(i=0; i<pIter->nLvl; i++){ 10146 fts5DlidxLvlNext(&pIter->aLvl[i]); 10147 } 10148 return pIter->aLvl[0].bEof; 10149 } 10150 10151 10152 static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ 10153 return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; 10154 } 10155 10156 static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ 10157 int i; 10158 10159 /* Advance each level to the last entry on the last page */ 10160 for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ 10161 Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; 10162 while( fts5DlidxLvlNext(pLvl)==0 ); 10163 pLvl->bEof = 0; 10164 10165 if( i>0 ){ 10166 Fts5DlidxLvl *pChild = &pLvl[-1]; 10167 fts5DataRelease(pChild->pData); 10168 memset(pChild, 0, sizeof(Fts5DlidxLvl)); 10169 pChild->pData = fts5DataRead(p, 10170 FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) 10171 ); 10172 } 10173 } 10174 } 10175 10176 /* 10177 ** Move the iterator passed as the only argument to the previous entry. 10178 */ 10179 static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ 10180 int iOff = pLvl->iOff; 10181 10182 assert( pLvl->bEof==0 ); 10183 if( iOff<=pLvl->iFirstOff ){ 10184 pLvl->bEof = 1; 10185 }else{ 10186 u8 *a = pLvl->pData->p; 10187 i64 iVal; 10188 int iLimit; 10189 int ii; 10190 int nZero = 0; 10191 10192 /* Currently iOff points to the first byte of a varint. This block 10193 ** decrements iOff until it points to the first byte of the previous 10194 ** varint. Taking care not to read any memory locations that occur 10195 ** before the buffer in memory. */ 10196 iLimit = (iOff>9 ? iOff-9 : 0); 10197 for(iOff--; iOff>iLimit; iOff--){ 10198 if( (a[iOff-1] & 0x80)==0 ) break; 10199 } 10200 10201 fts5GetVarint(&a[iOff], (u64*)&iVal); 10202 pLvl->iRowid -= iVal; 10203 pLvl->iLeafPgno--; 10204 10205 /* Skip backwards past any 0x00 varints. */ 10206 for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ 10207 nZero++; 10208 } 10209 if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ 10210 /* The byte immediately before the last 0x00 byte has the 0x80 bit 10211 ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 10212 ** bytes before a[ii]. */ 10213 int bZero = 0; /* True if last 0x00 counts */ 10214 if( (ii-8)>=pLvl->iFirstOff ){ 10215 int j; 10216 for(j=1; j<=8 && (a[ii-j] & 0x80); j++); 10217 bZero = (j>8); 10218 } 10219 if( bZero==0 ) nZero--; 10220 } 10221 pLvl->iLeafPgno -= nZero; 10222 pLvl->iOff = iOff - nZero; 10223 } 10224 10225 return pLvl->bEof; 10226 } 10227 10228 static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ 10229 Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; 10230 10231 assert( iLvl<pIter->nLvl ); 10232 if( fts5DlidxLvlPrev(pLvl) ){ 10233 if( (iLvl+1) < pIter->nLvl ){ 10234 fts5DlidxIterPrevR(p, pIter, iLvl+1); 10235 if( pLvl[1].bEof==0 ){ 10236 fts5DataRelease(pLvl->pData); 10237 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 10238 pLvl->pData = fts5DataRead(p, 10239 FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) 10240 ); 10241 if( pLvl->pData ){ 10242 while( fts5DlidxLvlNext(pLvl)==0 ); 10243 pLvl->bEof = 0; 10244 } 10245 } 10246 } 10247 } 10248 10249 return pIter->aLvl[0].bEof; 10250 } 10251 static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ 10252 return fts5DlidxIterPrevR(p, pIter, 0); 10253 } 10254 10255 /* 10256 ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). 10257 */ 10258 static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ 10259 if( pIter ){ 10260 int i; 10261 for(i=0; i<pIter->nLvl; i++){ 10262 fts5DataRelease(pIter->aLvl[i].pData); 10263 } 10264 sqlite3_free(pIter); 10265 } 10266 } 10267 10268 static Fts5DlidxIter *fts5DlidxIterInit( 10269 Fts5Index *p, /* Fts5 Backend to iterate within */ 10270 int bRev, /* True for ORDER BY ASC */ 10271 int iSegid, /* Segment id */ 10272 int iLeafPg /* Leaf page number to load dlidx for */ 10273 ){ 10274 Fts5DlidxIter *pIter = 0; 10275 int i; 10276 int bDone = 0; 10277 10278 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 10279 sqlite3_int64 nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); 10280 Fts5DlidxIter *pNew; 10281 10282 pNew = (Fts5DlidxIter*)sqlite3_realloc64(pIter, nByte); 10283 if( pNew==0 ){ 10284 p->rc = SQLITE_NOMEM; 10285 }else{ 10286 i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); 10287 Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; 10288 pIter = pNew; 10289 memset(pLvl, 0, sizeof(Fts5DlidxLvl)); 10290 pLvl->pData = fts5DataRead(p, iRowid); 10291 if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ 10292 bDone = 1; 10293 } 10294 pIter->nLvl = i+1; 10295 } 10296 } 10297 10298 if( p->rc==SQLITE_OK ){ 10299 pIter->iSegid = iSegid; 10300 if( bRev==0 ){ 10301 fts5DlidxIterFirst(pIter); 10302 }else{ 10303 fts5DlidxIterLast(p, pIter); 10304 } 10305 } 10306 10307 if( p->rc!=SQLITE_OK ){ 10308 fts5DlidxIterFree(pIter); 10309 pIter = 0; 10310 } 10311 10312 return pIter; 10313 } 10314 10315 static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ 10316 return pIter->aLvl[0].iRowid; 10317 } 10318 static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ 10319 return pIter->aLvl[0].iLeafPgno; 10320 } 10321 10322 /* 10323 ** Load the next leaf page into the segment iterator. 10324 */ 10325 static void fts5SegIterNextPage( 10326 Fts5Index *p, /* FTS5 backend object */ 10327 Fts5SegIter *pIter /* Iterator to advance to next page */ 10328 ){ 10329 Fts5Data *pLeaf; 10330 Fts5StructureSegment *pSeg = pIter->pSeg; 10331 fts5DataRelease(pIter->pLeaf); 10332 pIter->iLeafPgno++; 10333 if( pIter->pNextLeaf ){ 10334 pIter->pLeaf = pIter->pNextLeaf; 10335 pIter->pNextLeaf = 0; 10336 }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ 10337 pIter->pLeaf = fts5LeafRead(p, 10338 FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) 10339 ); 10340 }else{ 10341 pIter->pLeaf = 0; 10342 } 10343 pLeaf = pIter->pLeaf; 10344 10345 if( pLeaf ){ 10346 pIter->iPgidxOff = pLeaf->szLeaf; 10347 if( fts5LeafIsTermless(pLeaf) ){ 10348 pIter->iEndofDoclist = pLeaf->nn+1; 10349 }else{ 10350 pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], 10351 pIter->iEndofDoclist 10352 ); 10353 } 10354 } 10355 } 10356 10357 /* 10358 ** Argument p points to a buffer containing a varint to be interpreted as a 10359 ** position list size field. Read the varint and return the number of bytes 10360 ** read. Before returning, set *pnSz to the number of bytes in the position 10361 ** list, and *pbDel to true if the delete flag is set, or false otherwise. 10362 */ 10363 static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ 10364 int nSz; 10365 int n = 0; 10366 fts5FastGetVarint32(p, n, nSz); 10367 assert_nc( nSz>=0 ); 10368 *pnSz = nSz/2; 10369 *pbDel = nSz & 0x0001; 10370 return n; 10371 } 10372 10373 /* 10374 ** Fts5SegIter.iLeafOffset currently points to the first byte of a 10375 ** position-list size field. Read the value of the field and store it 10376 ** in the following variables: 10377 ** 10378 ** Fts5SegIter.nPos 10379 ** Fts5SegIter.bDel 10380 ** 10381 ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the 10382 ** position list content (if any). 10383 */ 10384 static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ 10385 if( p->rc==SQLITE_OK ){ 10386 int iOff = pIter->iLeafOffset; /* Offset to read at */ 10387 ASSERT_SZLEAF_OK(pIter->pLeaf); 10388 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 10389 int iEod = MIN(pIter->iEndofDoclist, pIter->pLeaf->szLeaf); 10390 pIter->bDel = 0; 10391 pIter->nPos = 1; 10392 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ 10393 pIter->bDel = 1; 10394 iOff++; 10395 if( iOff<iEod && pIter->pLeaf->p[iOff]==0 ){ 10396 pIter->nPos = 1; 10397 iOff++; 10398 }else{ 10399 pIter->nPos = 0; 10400 } 10401 } 10402 }else{ 10403 int nSz; 10404 fts5FastGetVarint32(pIter->pLeaf->p, iOff, nSz); 10405 pIter->bDel = (nSz & 0x0001); 10406 pIter->nPos = nSz>>1; 10407 assert_nc( pIter->nPos>=0 ); 10408 } 10409 pIter->iLeafOffset = iOff; 10410 } 10411 } 10412 10413 static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ 10414 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 10415 i64 iOff = pIter->iLeafOffset; 10416 10417 ASSERT_SZLEAF_OK(pIter->pLeaf); 10418 if( iOff>=pIter->pLeaf->szLeaf ){ 10419 fts5SegIterNextPage(p, pIter); 10420 if( pIter->pLeaf==0 ){ 10421 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; 10422 return; 10423 } 10424 iOff = 4; 10425 a = pIter->pLeaf->p; 10426 } 10427 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); 10428 pIter->iLeafOffset = iOff; 10429 } 10430 10431 /* 10432 ** Fts5SegIter.iLeafOffset currently points to the first byte of the 10433 ** "nSuffix" field of a term. Function parameter nKeep contains the value 10434 ** of the "nPrefix" field (if there was one - it is passed 0 if this is 10435 ** the first term in the segment). 10436 ** 10437 ** This function populates: 10438 ** 10439 ** Fts5SegIter.term 10440 ** Fts5SegIter.rowid 10441 ** 10442 ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of 10443 ** the first position list. The position list belonging to document 10444 ** (Fts5SegIter.iRowid). 10445 */ 10446 static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ 10447 u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ 10448 i64 iOff = pIter->iLeafOffset; /* Offset to read at */ 10449 int nNew; /* Bytes of new data */ 10450 10451 iOff += fts5GetVarint32(&a[iOff], nNew); 10452 if( iOff+nNew>pIter->pLeaf->szLeaf || nKeep>pIter->term.n || nNew==0 ){ 10453 p->rc = FTS5_CORRUPT; 10454 return; 10455 } 10456 pIter->term.n = nKeep; 10457 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); 10458 assert( pIter->term.n<=pIter->term.nSpace ); 10459 iOff += nNew; 10460 pIter->iTermLeafOffset = iOff; 10461 pIter->iTermLeafPgno = pIter->iLeafPgno; 10462 pIter->iLeafOffset = iOff; 10463 10464 if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ 10465 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 10466 }else{ 10467 int nExtra; 10468 pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); 10469 pIter->iEndofDoclist += nExtra; 10470 } 10471 10472 fts5SegIterLoadRowid(p, pIter); 10473 } 10474 10475 static void fts5SegIterNext(Fts5Index*, Fts5SegIter*, int*); 10476 static void fts5SegIterNext_Reverse(Fts5Index*, Fts5SegIter*, int*); 10477 static void fts5SegIterNext_None(Fts5Index*, Fts5SegIter*, int*); 10478 10479 static void fts5SegIterSetNext(Fts5Index *p, Fts5SegIter *pIter){ 10480 if( pIter->flags & FTS5_SEGITER_REVERSE ){ 10481 pIter->xNext = fts5SegIterNext_Reverse; 10482 }else if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 10483 pIter->xNext = fts5SegIterNext_None; 10484 }else{ 10485 pIter->xNext = fts5SegIterNext; 10486 } 10487 } 10488 10489 /* 10490 ** Initialize the iterator object pIter to iterate through the entries in 10491 ** segment pSeg. The iterator is left pointing to the first entry when 10492 ** this function returns. 10493 ** 10494 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 10495 ** an error has already occurred when this function is called, it is a no-op. 10496 */ 10497 static void fts5SegIterInit( 10498 Fts5Index *p, /* FTS index object */ 10499 Fts5StructureSegment *pSeg, /* Description of segment */ 10500 Fts5SegIter *pIter /* Object to populate */ 10501 ){ 10502 if( pSeg->pgnoFirst==0 ){ 10503 /* This happens if the segment is being used as an input to an incremental 10504 ** merge and all data has already been "trimmed". See function 10505 ** fts5TrimSegments() for details. In this case leave the iterator empty. 10506 ** The caller will see the (pIter->pLeaf==0) and assume the iterator is 10507 ** at EOF already. */ 10508 assert( pIter->pLeaf==0 ); 10509 return; 10510 } 10511 10512 if( p->rc==SQLITE_OK ){ 10513 memset(pIter, 0, sizeof(*pIter)); 10514 fts5SegIterSetNext(p, pIter); 10515 pIter->pSeg = pSeg; 10516 pIter->iLeafPgno = pSeg->pgnoFirst-1; 10517 fts5SegIterNextPage(p, pIter); 10518 } 10519 10520 if( p->rc==SQLITE_OK ){ 10521 pIter->iLeafOffset = 4; 10522 assert( pIter->pLeaf!=0 ); 10523 assert_nc( pIter->pLeaf->nn>4 ); 10524 assert_nc( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); 10525 pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; 10526 fts5SegIterLoadTerm(p, pIter, 0); 10527 fts5SegIterLoadNPos(p, pIter); 10528 } 10529 } 10530 10531 /* 10532 ** This function is only ever called on iterators created by calls to 10533 ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. 10534 ** 10535 ** The iterator is in an unusual state when this function is called: the 10536 ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of 10537 ** the position-list size field for the first relevant rowid on the page. 10538 ** Fts5SegIter.rowid is set, but nPos and bDel are not. 10539 ** 10540 ** This function advances the iterator so that it points to the last 10541 ** relevant rowid on the page and, if necessary, initializes the 10542 ** aRowidOffset[] and iRowidOffset variables. At this point the iterator 10543 ** is in its regular state - Fts5SegIter.iLeafOffset points to the first 10544 ** byte of the position list content associated with said rowid. 10545 */ 10546 static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ 10547 int eDetail = p->pConfig->eDetail; 10548 int n = pIter->pLeaf->szLeaf; 10549 int i = pIter->iLeafOffset; 10550 u8 *a = pIter->pLeaf->p; 10551 int iRowidOffset = 0; 10552 10553 if( n>pIter->iEndofDoclist ){ 10554 n = pIter->iEndofDoclist; 10555 } 10556 10557 ASSERT_SZLEAF_OK(pIter->pLeaf); 10558 while( 1 ){ 10559 u64 iDelta = 0; 10560 10561 if( eDetail==FTS5_DETAIL_NONE ){ 10562 /* todo */ 10563 if( i<n && a[i]==0 ){ 10564 i++; 10565 if( i<n && a[i]==0 ) i++; 10566 } 10567 }else{ 10568 int nPos; 10569 int bDummy; 10570 i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); 10571 i += nPos; 10572 } 10573 if( i>=n ) break; 10574 i += fts5GetVarint(&a[i], &iDelta); 10575 pIter->iRowid += iDelta; 10576 10577 /* If necessary, grow the pIter->aRowidOffset[] array. */ 10578 if( iRowidOffset>=pIter->nRowidOffset ){ 10579 int nNew = pIter->nRowidOffset + 8; 10580 int *aNew = (int*)sqlite3_realloc64(pIter->aRowidOffset,nNew*sizeof(int)); 10581 if( aNew==0 ){ 10582 p->rc = SQLITE_NOMEM; 10583 break; 10584 } 10585 pIter->aRowidOffset = aNew; 10586 pIter->nRowidOffset = nNew; 10587 } 10588 10589 pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; 10590 pIter->iLeafOffset = i; 10591 } 10592 pIter->iRowidOffset = iRowidOffset; 10593 fts5SegIterLoadNPos(p, pIter); 10594 } 10595 10596 /* 10597 ** 10598 */ 10599 static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ 10600 assert( pIter->flags & FTS5_SEGITER_REVERSE ); 10601 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 10602 10603 fts5DataRelease(pIter->pLeaf); 10604 pIter->pLeaf = 0; 10605 while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ 10606 Fts5Data *pNew; 10607 pIter->iLeafPgno--; 10608 pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( 10609 pIter->pSeg->iSegid, pIter->iLeafPgno 10610 )); 10611 if( pNew ){ 10612 /* iTermLeafOffset may be equal to szLeaf if the term is the last 10613 ** thing on the page - i.e. the first rowid is on the following page. 10614 ** In this case leave pIter->pLeaf==0, this iterator is at EOF. */ 10615 if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ 10616 assert( pIter->pLeaf==0 ); 10617 if( pIter->iTermLeafOffset<pNew->szLeaf ){ 10618 pIter->pLeaf = pNew; 10619 pIter->iLeafOffset = pIter->iTermLeafOffset; 10620 } 10621 }else{ 10622 int iRowidOff; 10623 iRowidOff = fts5LeafFirstRowidOff(pNew); 10624 if( iRowidOff ){ 10625 if( iRowidOff>=pNew->szLeaf ){ 10626 p->rc = FTS5_CORRUPT; 10627 }else{ 10628 pIter->pLeaf = pNew; 10629 pIter->iLeafOffset = iRowidOff; 10630 } 10631 } 10632 } 10633 10634 if( pIter->pLeaf ){ 10635 u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; 10636 pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); 10637 break; 10638 }else{ 10639 fts5DataRelease(pNew); 10640 } 10641 } 10642 } 10643 10644 if( pIter->pLeaf ){ 10645 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 10646 fts5SegIterReverseInitPage(p, pIter); 10647 } 10648 } 10649 10650 /* 10651 ** Return true if the iterator passed as the second argument currently 10652 ** points to a delete marker. A delete marker is an entry with a 0 byte 10653 ** position-list. 10654 */ 10655 static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5Iter *pIter){ 10656 Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; 10657 return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); 10658 } 10659 10660 /* 10661 ** Advance iterator pIter to the next entry. 10662 ** 10663 ** This version of fts5SegIterNext() is only used by reverse iterators. 10664 */ 10665 static void fts5SegIterNext_Reverse( 10666 Fts5Index *p, /* FTS5 backend object */ 10667 Fts5SegIter *pIter, /* Iterator to advance */ 10668 int *pbUnused /* Unused */ 10669 ){ 10670 assert( pIter->flags & FTS5_SEGITER_REVERSE ); 10671 assert( pIter->pNextLeaf==0 ); 10672 UNUSED_PARAM(pbUnused); 10673 10674 if( pIter->iRowidOffset>0 ){ 10675 u8 *a = pIter->pLeaf->p; 10676 int iOff; 10677 u64 iDelta; 10678 10679 pIter->iRowidOffset--; 10680 pIter->iLeafOffset = pIter->aRowidOffset[pIter->iRowidOffset]; 10681 fts5SegIterLoadNPos(p, pIter); 10682 iOff = pIter->iLeafOffset; 10683 if( p->pConfig->eDetail!=FTS5_DETAIL_NONE ){ 10684 iOff += pIter->nPos; 10685 } 10686 fts5GetVarint(&a[iOff], &iDelta); 10687 pIter->iRowid -= iDelta; 10688 }else{ 10689 fts5SegIterReverseNewPage(p, pIter); 10690 } 10691 } 10692 10693 /* 10694 ** Advance iterator pIter to the next entry. 10695 ** 10696 ** This version of fts5SegIterNext() is only used if detail=none and the 10697 ** iterator is not a reverse direction iterator. 10698 */ 10699 static void fts5SegIterNext_None( 10700 Fts5Index *p, /* FTS5 backend object */ 10701 Fts5SegIter *pIter, /* Iterator to advance */ 10702 int *pbNewTerm /* OUT: Set for new term */ 10703 ){ 10704 int iOff; 10705 10706 assert( p->rc==SQLITE_OK ); 10707 assert( (pIter->flags & FTS5_SEGITER_REVERSE)==0 ); 10708 assert( p->pConfig->eDetail==FTS5_DETAIL_NONE ); 10709 10710 ASSERT_SZLEAF_OK(pIter->pLeaf); 10711 iOff = pIter->iLeafOffset; 10712 10713 /* Next entry is on the next page */ 10714 if( pIter->pSeg && iOff>=pIter->pLeaf->szLeaf ){ 10715 fts5SegIterNextPage(p, pIter); 10716 if( p->rc || pIter->pLeaf==0 ) return; 10717 pIter->iRowid = 0; 10718 iOff = 4; 10719 } 10720 10721 if( iOff<pIter->iEndofDoclist ){ 10722 /* Next entry is on the current page */ 10723 i64 iDelta; 10724 iOff += sqlite3Fts5GetVarint(&pIter->pLeaf->p[iOff], (u64*)&iDelta); 10725 pIter->iLeafOffset = iOff; 10726 pIter->iRowid += iDelta; 10727 }else if( (pIter->flags & FTS5_SEGITER_ONETERM)==0 ){ 10728 if( pIter->pSeg ){ 10729 int nKeep = 0; 10730 if( iOff!=fts5LeafFirstTermOff(pIter->pLeaf) ){ 10731 iOff += fts5GetVarint32(&pIter->pLeaf->p[iOff], nKeep); 10732 } 10733 pIter->iLeafOffset = iOff; 10734 fts5SegIterLoadTerm(p, pIter, nKeep); 10735 }else{ 10736 const u8 *pList = 0; 10737 const char *zTerm = 0; 10738 int nList; 10739 sqlite3Fts5HashScanNext(p->pHash); 10740 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); 10741 if( pList==0 ) goto next_none_eof; 10742 pIter->pLeaf->p = (u8*)pList; 10743 pIter->pLeaf->nn = nList; 10744 pIter->pLeaf->szLeaf = nList; 10745 pIter->iEndofDoclist = nList; 10746 sqlite3Fts5BufferSet(&p->rc,&pIter->term, (int)strlen(zTerm), (u8*)zTerm); 10747 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); 10748 } 10749 10750 if( pbNewTerm ) *pbNewTerm = 1; 10751 }else{ 10752 goto next_none_eof; 10753 } 10754 10755 fts5SegIterLoadNPos(p, pIter); 10756 10757 return; 10758 next_none_eof: 10759 fts5DataRelease(pIter->pLeaf); 10760 pIter->pLeaf = 0; 10761 } 10762 10763 10764 /* 10765 ** Advance iterator pIter to the next entry. 10766 ** 10767 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It 10768 ** is not considered an error if the iterator reaches EOF. If an error has 10769 ** already occurred when this function is called, it is a no-op. 10770 */ 10771 static void fts5SegIterNext( 10772 Fts5Index *p, /* FTS5 backend object */ 10773 Fts5SegIter *pIter, /* Iterator to advance */ 10774 int *pbNewTerm /* OUT: Set for new term */ 10775 ){ 10776 Fts5Data *pLeaf = pIter->pLeaf; 10777 int iOff; 10778 int bNewTerm = 0; 10779 int nKeep = 0; 10780 u8 *a; 10781 int n; 10782 10783 assert( pbNewTerm==0 || *pbNewTerm==0 ); 10784 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); 10785 10786 /* Search for the end of the position list within the current page. */ 10787 a = pLeaf->p; 10788 n = pLeaf->szLeaf; 10789 10790 ASSERT_SZLEAF_OK(pLeaf); 10791 iOff = pIter->iLeafOffset + pIter->nPos; 10792 10793 if( iOff<n ){ 10794 /* The next entry is on the current page. */ 10795 assert_nc( iOff<=pIter->iEndofDoclist ); 10796 if( iOff>=pIter->iEndofDoclist ){ 10797 bNewTerm = 1; 10798 if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ 10799 iOff += fts5GetVarint32(&a[iOff], nKeep); 10800 } 10801 }else{ 10802 u64 iDelta; 10803 iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); 10804 pIter->iRowid += iDelta; 10805 assert_nc( iDelta>0 ); 10806 } 10807 pIter->iLeafOffset = iOff; 10808 10809 }else if( pIter->pSeg==0 ){ 10810 const u8 *pList = 0; 10811 const char *zTerm = 0; 10812 int nList = 0; 10813 assert( (pIter->flags & FTS5_SEGITER_ONETERM) || pbNewTerm ); 10814 if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ 10815 sqlite3Fts5HashScanNext(p->pHash); 10816 sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); 10817 } 10818 if( pList==0 ){ 10819 fts5DataRelease(pIter->pLeaf); 10820 pIter->pLeaf = 0; 10821 }else{ 10822 pIter->pLeaf->p = (u8*)pList; 10823 pIter->pLeaf->nn = nList; 10824 pIter->pLeaf->szLeaf = nList; 10825 pIter->iEndofDoclist = nList+1; 10826 sqlite3Fts5BufferSet(&p->rc, &pIter->term, (int)strlen(zTerm), 10827 (u8*)zTerm); 10828 pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); 10829 *pbNewTerm = 1; 10830 } 10831 }else{ 10832 iOff = 0; 10833 /* Next entry is not on the current page */ 10834 while( iOff==0 ){ 10835 fts5SegIterNextPage(p, pIter); 10836 pLeaf = pIter->pLeaf; 10837 if( pLeaf==0 ) break; 10838 ASSERT_SZLEAF_OK(pLeaf); 10839 if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOff<pLeaf->szLeaf ){ 10840 iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); 10841 pIter->iLeafOffset = iOff; 10842 10843 if( pLeaf->nn>pLeaf->szLeaf ){ 10844 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( 10845 &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist 10846 ); 10847 } 10848 } 10849 else if( pLeaf->nn>pLeaf->szLeaf ){ 10850 pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( 10851 &pLeaf->p[pLeaf->szLeaf], iOff 10852 ); 10853 pIter->iLeafOffset = iOff; 10854 pIter->iEndofDoclist = iOff; 10855 bNewTerm = 1; 10856 } 10857 assert_nc( iOff<pLeaf->szLeaf ); 10858 if( iOff>pLeaf->szLeaf ){ 10859 p->rc = FTS5_CORRUPT; 10860 return; 10861 } 10862 } 10863 } 10864 10865 /* Check if the iterator is now at EOF. If so, return early. */ 10866 if( pIter->pLeaf ){ 10867 if( bNewTerm ){ 10868 if( pIter->flags & FTS5_SEGITER_ONETERM ){ 10869 fts5DataRelease(pIter->pLeaf); 10870 pIter->pLeaf = 0; 10871 }else{ 10872 fts5SegIterLoadTerm(p, pIter, nKeep); 10873 fts5SegIterLoadNPos(p, pIter); 10874 if( pbNewTerm ) *pbNewTerm = 1; 10875 } 10876 }else{ 10877 /* The following could be done by calling fts5SegIterLoadNPos(). But 10878 ** this block is particularly performance critical, so equivalent 10879 ** code is inlined. */ 10880 int nSz; 10881 assert_nc( pIter->iLeafOffset<=pIter->pLeaf->nn ); 10882 fts5FastGetVarint32(pIter->pLeaf->p, pIter->iLeafOffset, nSz); 10883 pIter->bDel = (nSz & 0x0001); 10884 pIter->nPos = nSz>>1; 10885 assert_nc( pIter->nPos>=0 ); 10886 } 10887 } 10888 } 10889 10890 #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } 10891 10892 #define fts5IndexSkipVarint(a, iOff) { \ 10893 int iEnd = iOff+9; \ 10894 while( (a[iOff++] & 0x80) && iOff<iEnd ); \ 10895 } 10896 10897 /* 10898 ** Iterator pIter currently points to the first rowid in a doclist. This 10899 ** function sets the iterator up so that iterates in reverse order through 10900 ** the doclist. 10901 */ 10902 static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ 10903 Fts5DlidxIter *pDlidx = pIter->pDlidx; 10904 Fts5Data *pLast = 0; 10905 int pgnoLast = 0; 10906 10907 if( pDlidx ){ 10908 int iSegid = pIter->pSeg->iSegid; 10909 pgnoLast = fts5DlidxIterPgno(pDlidx); 10910 pLast = fts5LeafRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); 10911 }else{ 10912 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ 10913 10914 /* Currently, Fts5SegIter.iLeafOffset points to the first byte of 10915 ** position-list content for the current rowid. Back it up so that it 10916 ** points to the start of the position-list size field. */ 10917 int iPoslist; 10918 if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ 10919 iPoslist = pIter->iTermLeafOffset; 10920 }else{ 10921 iPoslist = 4; 10922 } 10923 fts5IndexSkipVarint(pLeaf->p, iPoslist); 10924 pIter->iLeafOffset = iPoslist; 10925 10926 /* If this condition is true then the largest rowid for the current 10927 ** term may not be stored on the current page. So search forward to 10928 ** see where said rowid really is. */ 10929 if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ 10930 int pgno; 10931 Fts5StructureSegment *pSeg = pIter->pSeg; 10932 10933 /* The last rowid in the doclist may not be on the current page. Search 10934 ** forward to find the page containing the last rowid. */ 10935 for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ 10936 i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); 10937 Fts5Data *pNew = fts5LeafRead(p, iAbs); 10938 if( pNew ){ 10939 int iRowid, bTermless; 10940 iRowid = fts5LeafFirstRowidOff(pNew); 10941 bTermless = fts5LeafIsTermless(pNew); 10942 if( iRowid ){ 10943 SWAPVAL(Fts5Data*, pNew, pLast); 10944 pgnoLast = pgno; 10945 } 10946 fts5DataRelease(pNew); 10947 if( bTermless==0 ) break; 10948 } 10949 } 10950 } 10951 } 10952 10953 /* If pLast is NULL at this point, then the last rowid for this doclist 10954 ** lies on the page currently indicated by the iterator. In this case 10955 ** pIter->iLeafOffset is already set to point to the position-list size 10956 ** field associated with the first relevant rowid on the page. 10957 ** 10958 ** Or, if pLast is non-NULL, then it is the page that contains the last 10959 ** rowid. In this case configure the iterator so that it points to the 10960 ** first rowid on this page. 10961 */ 10962 if( pLast ){ 10963 int iOff; 10964 fts5DataRelease(pIter->pLeaf); 10965 pIter->pLeaf = pLast; 10966 pIter->iLeafPgno = pgnoLast; 10967 iOff = fts5LeafFirstRowidOff(pLast); 10968 if( iOff>pLast->szLeaf ){ 10969 p->rc = FTS5_CORRUPT; 10970 return; 10971 } 10972 iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); 10973 pIter->iLeafOffset = iOff; 10974 10975 if( fts5LeafIsTermless(pLast) ){ 10976 pIter->iEndofDoclist = pLast->nn+1; 10977 }else{ 10978 pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); 10979 } 10980 } 10981 10982 fts5SegIterReverseInitPage(p, pIter); 10983 } 10984 10985 /* 10986 ** Iterator pIter currently points to the first rowid of a doclist. 10987 ** There is a doclist-index associated with the final term on the current 10988 ** page. If the current term is the last term on the page, load the 10989 ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). 10990 */ 10991 static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ 10992 int iSeg = pIter->pSeg->iSegid; 10993 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); 10994 Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ 10995 10996 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 10997 assert( pIter->pDlidx==0 ); 10998 10999 /* Check if the current doclist ends on this page. If it does, return 11000 ** early without loading the doclist-index (as it belongs to a different 11001 ** term. */ 11002 if( pIter->iTermLeafPgno==pIter->iLeafPgno 11003 && pIter->iEndofDoclist<pLeaf->szLeaf 11004 ){ 11005 return; 11006 } 11007 11008 pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); 11009 } 11010 11011 /* 11012 ** The iterator object passed as the second argument currently contains 11013 ** no valid values except for the Fts5SegIter.pLeaf member variable. This 11014 ** function searches the leaf page for a term matching (pTerm/nTerm). 11015 ** 11016 ** If the specified term is found on the page, then the iterator is left 11017 ** pointing to it. If argument bGe is zero and the term is not found, 11018 ** the iterator is left pointing at EOF. 11019 ** 11020 ** If bGe is non-zero and the specified term is not found, then the 11021 ** iterator is left pointing to the smallest term in the segment that 11022 ** is larger than the specified term, even if this term is not on the 11023 ** current page. 11024 */ 11025 static void fts5LeafSeek( 11026 Fts5Index *p, /* Leave any error code here */ 11027 int bGe, /* True for a >= search */ 11028 Fts5SegIter *pIter, /* Iterator to seek */ 11029 const u8 *pTerm, int nTerm /* Term to search for */ 11030 ){ 11031 u32 iOff; 11032 const u8 *a = pIter->pLeaf->p; 11033 u32 n = (u32)pIter->pLeaf->nn; 11034 11035 u32 nMatch = 0; 11036 u32 nKeep = 0; 11037 u32 nNew = 0; 11038 u32 iTermOff; 11039 u32 iPgidx; /* Current offset in pgidx */ 11040 int bEndOfPage = 0; 11041 11042 assert( p->rc==SQLITE_OK ); 11043 11044 iPgidx = (u32)pIter->pLeaf->szLeaf; 11045 iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); 11046 iOff = iTermOff; 11047 if( iOff>n ){ 11048 p->rc = FTS5_CORRUPT; 11049 return; 11050 } 11051 11052 while( 1 ){ 11053 11054 /* Figure out how many new bytes are in this term */ 11055 fts5FastGetVarint32(a, iOff, nNew); 11056 if( nKeep<nMatch ){ 11057 goto search_failed; 11058 } 11059 11060 assert( nKeep>=nMatch ); 11061 if( nKeep==nMatch ){ 11062 u32 nCmp; 11063 u32 i; 11064 nCmp = (u32)MIN(nNew, nTerm-nMatch); 11065 for(i=0; i<nCmp; i++){ 11066 if( a[iOff+i]!=pTerm[nMatch+i] ) break; 11067 } 11068 nMatch += i; 11069 11070 if( (u32)nTerm==nMatch ){ 11071 if( i==nNew ){ 11072 goto search_success; 11073 }else{ 11074 goto search_failed; 11075 } 11076 }else if( i<nNew && a[iOff+i]>pTerm[nMatch] ){ 11077 goto search_failed; 11078 } 11079 } 11080 11081 if( iPgidx>=n ){ 11082 bEndOfPage = 1; 11083 break; 11084 } 11085 11086 iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); 11087 iTermOff += nKeep; 11088 iOff = iTermOff; 11089 11090 if( iOff>=n ){ 11091 p->rc = FTS5_CORRUPT; 11092 return; 11093 } 11094 11095 /* Read the nKeep field of the next term. */ 11096 fts5FastGetVarint32(a, iOff, nKeep); 11097 } 11098 11099 search_failed: 11100 if( bGe==0 ){ 11101 fts5DataRelease(pIter->pLeaf); 11102 pIter->pLeaf = 0; 11103 return; 11104 }else if( bEndOfPage ){ 11105 do { 11106 fts5SegIterNextPage(p, pIter); 11107 if( pIter->pLeaf==0 ) return; 11108 a = pIter->pLeaf->p; 11109 if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ 11110 iPgidx = (u32)pIter->pLeaf->szLeaf; 11111 iPgidx += fts5GetVarint32(&pIter->pLeaf->p[iPgidx], iOff); 11112 if( iOff<4 || (i64)iOff>=pIter->pLeaf->szLeaf ){ 11113 p->rc = FTS5_CORRUPT; 11114 return; 11115 }else{ 11116 nKeep = 0; 11117 iTermOff = iOff; 11118 n = (u32)pIter->pLeaf->nn; 11119 iOff += fts5GetVarint32(&a[iOff], nNew); 11120 break; 11121 } 11122 } 11123 }while( 1 ); 11124 } 11125 11126 search_success: 11127 if( (i64)iOff+nNew>n || nNew<1 ){ 11128 p->rc = FTS5_CORRUPT; 11129 return; 11130 } 11131 pIter->iLeafOffset = iOff + nNew; 11132 pIter->iTermLeafOffset = pIter->iLeafOffset; 11133 pIter->iTermLeafPgno = pIter->iLeafPgno; 11134 11135 fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); 11136 fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); 11137 11138 if( iPgidx>=n ){ 11139 pIter->iEndofDoclist = pIter->pLeaf->nn+1; 11140 }else{ 11141 int nExtra; 11142 iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); 11143 pIter->iEndofDoclist = iTermOff + nExtra; 11144 } 11145 pIter->iPgidxOff = iPgidx; 11146 11147 fts5SegIterLoadRowid(p, pIter); 11148 fts5SegIterLoadNPos(p, pIter); 11149 } 11150 11151 static sqlite3_stmt *fts5IdxSelectStmt(Fts5Index *p){ 11152 if( p->pIdxSelect==0 ){ 11153 Fts5Config *pConfig = p->pConfig; 11154 fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( 11155 "SELECT pgno FROM '%q'.'%q_idx' WHERE " 11156 "segid=? AND term<=? ORDER BY term DESC LIMIT 1", 11157 pConfig->zDb, pConfig->zName 11158 )); 11159 } 11160 return p->pIdxSelect; 11161 } 11162 11163 /* 11164 ** Initialize the object pIter to point to term pTerm/nTerm within segment 11165 ** pSeg. If there is no such term in the index, the iterator is set to EOF. 11166 ** 11167 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 11168 ** an error has already occurred when this function is called, it is a no-op. 11169 */ 11170 static void fts5SegIterSeekInit( 11171 Fts5Index *p, /* FTS5 backend */ 11172 const u8 *pTerm, int nTerm, /* Term to seek to */ 11173 int flags, /* Mask of FTS5INDEX_XXX flags */ 11174 Fts5StructureSegment *pSeg, /* Description of segment */ 11175 Fts5SegIter *pIter /* Object to populate */ 11176 ){ 11177 int iPg = 1; 11178 int bGe = (flags & FTS5INDEX_QUERY_SCAN); 11179 int bDlidx = 0; /* True if there is a doclist-index */ 11180 sqlite3_stmt *pIdxSelect = 0; 11181 11182 assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); 11183 assert( pTerm && nTerm ); 11184 memset(pIter, 0, sizeof(*pIter)); 11185 pIter->pSeg = pSeg; 11186 11187 /* This block sets stack variable iPg to the leaf page number that may 11188 ** contain term (pTerm/nTerm), if it is present in the segment. */ 11189 pIdxSelect = fts5IdxSelectStmt(p); 11190 if( p->rc ) return; 11191 sqlite3_bind_int(pIdxSelect, 1, pSeg->iSegid); 11192 sqlite3_bind_blob(pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); 11193 if( SQLITE_ROW==sqlite3_step(pIdxSelect) ){ 11194 i64 val = sqlite3_column_int(pIdxSelect, 0); 11195 iPg = (int)(val>>1); 11196 bDlidx = (val & 0x0001); 11197 } 11198 p->rc = sqlite3_reset(pIdxSelect); 11199 sqlite3_bind_null(pIdxSelect, 2); 11200 11201 if( iPg<pSeg->pgnoFirst ){ 11202 iPg = pSeg->pgnoFirst; 11203 bDlidx = 0; 11204 } 11205 11206 pIter->iLeafPgno = iPg - 1; 11207 fts5SegIterNextPage(p, pIter); 11208 11209 if( pIter->pLeaf ){ 11210 fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); 11211 } 11212 11213 if( p->rc==SQLITE_OK && bGe==0 ){ 11214 pIter->flags |= FTS5_SEGITER_ONETERM; 11215 if( pIter->pLeaf ){ 11216 if( flags & FTS5INDEX_QUERY_DESC ){ 11217 pIter->flags |= FTS5_SEGITER_REVERSE; 11218 } 11219 if( bDlidx ){ 11220 fts5SegIterLoadDlidx(p, pIter); 11221 } 11222 if( flags & FTS5INDEX_QUERY_DESC ){ 11223 fts5SegIterReverse(p, pIter); 11224 } 11225 } 11226 } 11227 11228 fts5SegIterSetNext(p, pIter); 11229 11230 /* Either: 11231 ** 11232 ** 1) an error has occurred, or 11233 ** 2) the iterator points to EOF, or 11234 ** 3) the iterator points to an entry with term (pTerm/nTerm), or 11235 ** 4) the FTS5INDEX_QUERY_SCAN flag was set and the iterator points 11236 ** to an entry with a term greater than or equal to (pTerm/nTerm). 11237 */ 11238 assert_nc( p->rc!=SQLITE_OK /* 1 */ 11239 || pIter->pLeaf==0 /* 2 */ 11240 || fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)==0 /* 3 */ 11241 || (bGe && fts5BufferCompareBlob(&pIter->term, pTerm, nTerm)>0) /* 4 */ 11242 ); 11243 } 11244 11245 /* 11246 ** Initialize the object pIter to point to term pTerm/nTerm within the 11247 ** in-memory hash table. If there is no such term in the hash-table, the 11248 ** iterator is set to EOF. 11249 ** 11250 ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If 11251 ** an error has already occurred when this function is called, it is a no-op. 11252 */ 11253 static void fts5SegIterHashInit( 11254 Fts5Index *p, /* FTS5 backend */ 11255 const u8 *pTerm, int nTerm, /* Term to seek to */ 11256 int flags, /* Mask of FTS5INDEX_XXX flags */ 11257 Fts5SegIter *pIter /* Object to populate */ 11258 ){ 11259 int nList = 0; 11260 const u8 *z = 0; 11261 int n = 0; 11262 Fts5Data *pLeaf = 0; 11263 11264 assert( p->pHash ); 11265 assert( p->rc==SQLITE_OK ); 11266 11267 if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ 11268 const u8 *pList = 0; 11269 11270 p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); 11271 sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); 11272 n = (z ? (int)strlen((const char*)z) : 0); 11273 if( pList ){ 11274 pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); 11275 if( pLeaf ){ 11276 pLeaf->p = (u8*)pList; 11277 } 11278 } 11279 }else{ 11280 p->rc = sqlite3Fts5HashQuery(p->pHash, sizeof(Fts5Data), 11281 (const char*)pTerm, nTerm, (void**)&pLeaf, &nList 11282 ); 11283 if( pLeaf ){ 11284 pLeaf->p = (u8*)&pLeaf[1]; 11285 } 11286 z = pTerm; 11287 n = nTerm; 11288 pIter->flags |= FTS5_SEGITER_ONETERM; 11289 } 11290 11291 if( pLeaf ){ 11292 sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); 11293 pLeaf->nn = pLeaf->szLeaf = nList; 11294 pIter->pLeaf = pLeaf; 11295 pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); 11296 pIter->iEndofDoclist = pLeaf->nn; 11297 11298 if( flags & FTS5INDEX_QUERY_DESC ){ 11299 pIter->flags |= FTS5_SEGITER_REVERSE; 11300 fts5SegIterReverseInitPage(p, pIter); 11301 }else{ 11302 fts5SegIterLoadNPos(p, pIter); 11303 } 11304 } 11305 11306 fts5SegIterSetNext(p, pIter); 11307 } 11308 11309 /* 11310 ** Zero the iterator passed as the only argument. 11311 */ 11312 static void fts5SegIterClear(Fts5SegIter *pIter){ 11313 fts5BufferFree(&pIter->term); 11314 fts5DataRelease(pIter->pLeaf); 11315 fts5DataRelease(pIter->pNextLeaf); 11316 fts5DlidxIterFree(pIter->pDlidx); 11317 sqlite3_free(pIter->aRowidOffset); 11318 memset(pIter, 0, sizeof(Fts5SegIter)); 11319 } 11320 11321 #ifdef SQLITE_DEBUG 11322 11323 /* 11324 ** This function is used as part of the big assert() procedure implemented by 11325 ** fts5AssertMultiIterSetup(). It ensures that the result currently stored 11326 ** in *pRes is the correct result of comparing the current positions of the 11327 ** two iterators. 11328 */ 11329 static void fts5AssertComparisonResult( 11330 Fts5Iter *pIter, 11331 Fts5SegIter *p1, 11332 Fts5SegIter *p2, 11333 Fts5CResult *pRes 11334 ){ 11335 int i1 = p1 - pIter->aSeg; 11336 int i2 = p2 - pIter->aSeg; 11337 11338 if( p1->pLeaf || p2->pLeaf ){ 11339 if( p1->pLeaf==0 ){ 11340 assert( pRes->iFirst==i2 ); 11341 }else if( p2->pLeaf==0 ){ 11342 assert( pRes->iFirst==i1 ); 11343 }else{ 11344 int nMin = MIN(p1->term.n, p2->term.n); 11345 int res = fts5Memcmp(p1->term.p, p2->term.p, nMin); 11346 if( res==0 ) res = p1->term.n - p2->term.n; 11347 11348 if( res==0 ){ 11349 assert( pRes->bTermEq==1 ); 11350 assert( p1->iRowid!=p2->iRowid ); 11351 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; 11352 }else{ 11353 assert( pRes->bTermEq==0 ); 11354 } 11355 11356 if( res<0 ){ 11357 assert( pRes->iFirst==i1 ); 11358 }else{ 11359 assert( pRes->iFirst==i2 ); 11360 } 11361 } 11362 } 11363 } 11364 11365 /* 11366 ** This function is a no-op unless SQLITE_DEBUG is defined when this module 11367 ** is compiled. In that case, this function is essentially an assert() 11368 ** statement used to verify that the contents of the pIter->aFirst[] array 11369 ** are correct. 11370 */ 11371 static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ 11372 if( p->rc==SQLITE_OK ){ 11373 Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 11374 int i; 11375 11376 assert( (pFirst->pLeaf==0)==pIter->base.bEof ); 11377 11378 /* Check that pIter->iSwitchRowid is set correctly. */ 11379 for(i=0; i<pIter->nSeg; i++){ 11380 Fts5SegIter *p1 = &pIter->aSeg[i]; 11381 assert( p1==pFirst 11382 || p1->pLeaf==0 11383 || fts5BufferCompare(&pFirst->term, &p1->term) 11384 || p1->iRowid==pIter->iSwitchRowid 11385 || (p1->iRowid<pIter->iSwitchRowid)==pIter->bRev 11386 ); 11387 } 11388 11389 for(i=0; i<pIter->nSeg; i+=2){ 11390 Fts5SegIter *p1 = &pIter->aSeg[i]; 11391 Fts5SegIter *p2 = &pIter->aSeg[i+1]; 11392 Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; 11393 fts5AssertComparisonResult(pIter, p1, p2, pRes); 11394 } 11395 11396 for(i=1; i<(pIter->nSeg / 2); i+=2){ 11397 Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; 11398 Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; 11399 Fts5CResult *pRes = &pIter->aFirst[i]; 11400 fts5AssertComparisonResult(pIter, p1, p2, pRes); 11401 } 11402 } 11403 } 11404 #else 11405 # define fts5AssertMultiIterSetup(x,y) 11406 #endif 11407 11408 /* 11409 ** Do the comparison necessary to populate pIter->aFirst[iOut]. 11410 ** 11411 ** If the returned value is non-zero, then it is the index of an entry 11412 ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing 11413 ** to a key that is a duplicate of another, higher priority, 11414 ** segment-iterator in the pSeg->aSeg[] array. 11415 */ 11416 static int fts5MultiIterDoCompare(Fts5Iter *pIter, int iOut){ 11417 int i1; /* Index of left-hand Fts5SegIter */ 11418 int i2; /* Index of right-hand Fts5SegIter */ 11419 int iRes; 11420 Fts5SegIter *p1; /* Left-hand Fts5SegIter */ 11421 Fts5SegIter *p2; /* Right-hand Fts5SegIter */ 11422 Fts5CResult *pRes = &pIter->aFirst[iOut]; 11423 11424 assert( iOut<pIter->nSeg && iOut>0 ); 11425 assert( pIter->bRev==0 || pIter->bRev==1 ); 11426 11427 if( iOut>=(pIter->nSeg/2) ){ 11428 i1 = (iOut - pIter->nSeg/2) * 2; 11429 i2 = i1 + 1; 11430 }else{ 11431 i1 = pIter->aFirst[iOut*2].iFirst; 11432 i2 = pIter->aFirst[iOut*2+1].iFirst; 11433 } 11434 p1 = &pIter->aSeg[i1]; 11435 p2 = &pIter->aSeg[i2]; 11436 11437 pRes->bTermEq = 0; 11438 if( p1->pLeaf==0 ){ /* If p1 is at EOF */ 11439 iRes = i2; 11440 }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ 11441 iRes = i1; 11442 }else{ 11443 int res = fts5BufferCompare(&p1->term, &p2->term); 11444 if( res==0 ){ 11445 assert_nc( i2>i1 ); 11446 assert_nc( i2!=0 ); 11447 pRes->bTermEq = 1; 11448 if( p1->iRowid==p2->iRowid ){ 11449 p1->bDel = p2->bDel; 11450 return i2; 11451 } 11452 res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; 11453 } 11454 assert( res!=0 ); 11455 if( res<0 ){ 11456 iRes = i1; 11457 }else{ 11458 iRes = i2; 11459 } 11460 } 11461 11462 pRes->iFirst = (u16)iRes; 11463 return 0; 11464 } 11465 11466 /* 11467 ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. 11468 ** It is an error if leaf iLeafPgno does not exist or contains no rowids. 11469 */ 11470 static void fts5SegIterGotoPage( 11471 Fts5Index *p, /* FTS5 backend object */ 11472 Fts5SegIter *pIter, /* Iterator to advance */ 11473 int iLeafPgno 11474 ){ 11475 assert( iLeafPgno>pIter->iLeafPgno ); 11476 11477 if( iLeafPgno>pIter->pSeg->pgnoLast ){ 11478 p->rc = FTS5_CORRUPT; 11479 }else{ 11480 fts5DataRelease(pIter->pNextLeaf); 11481 pIter->pNextLeaf = 0; 11482 pIter->iLeafPgno = iLeafPgno-1; 11483 fts5SegIterNextPage(p, pIter); 11484 assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); 11485 11486 if( p->rc==SQLITE_OK && ALWAYS(pIter->pLeaf!=0) ){ 11487 int iOff; 11488 u8 *a = pIter->pLeaf->p; 11489 int n = pIter->pLeaf->szLeaf; 11490 11491 iOff = fts5LeafFirstRowidOff(pIter->pLeaf); 11492 if( iOff<4 || iOff>=n ){ 11493 p->rc = FTS5_CORRUPT; 11494 }else{ 11495 iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); 11496 pIter->iLeafOffset = iOff; 11497 fts5SegIterLoadNPos(p, pIter); 11498 } 11499 } 11500 } 11501 } 11502 11503 /* 11504 ** Advance the iterator passed as the second argument until it is at or 11505 ** past rowid iFrom. Regardless of the value of iFrom, the iterator is 11506 ** always advanced at least once. 11507 */ 11508 static void fts5SegIterNextFrom( 11509 Fts5Index *p, /* FTS5 backend object */ 11510 Fts5SegIter *pIter, /* Iterator to advance */ 11511 i64 iMatch /* Advance iterator at least this far */ 11512 ){ 11513 int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); 11514 Fts5DlidxIter *pDlidx = pIter->pDlidx; 11515 int iLeafPgno = pIter->iLeafPgno; 11516 int bMove = 1; 11517 11518 assert( pIter->flags & FTS5_SEGITER_ONETERM ); 11519 assert( pIter->pDlidx ); 11520 assert( pIter->pLeaf ); 11521 11522 if( bRev==0 ){ 11523 while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ 11524 iLeafPgno = fts5DlidxIterPgno(pDlidx); 11525 fts5DlidxIterNext(p, pDlidx); 11526 } 11527 assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); 11528 if( iLeafPgno>pIter->iLeafPgno ){ 11529 fts5SegIterGotoPage(p, pIter, iLeafPgno); 11530 bMove = 0; 11531 } 11532 }else{ 11533 assert( pIter->pNextLeaf==0 ); 11534 assert( iMatch<pIter->iRowid ); 11535 while( !fts5DlidxIterEof(p, pDlidx) && iMatch<fts5DlidxIterRowid(pDlidx) ){ 11536 fts5DlidxIterPrev(p, pDlidx); 11537 } 11538 iLeafPgno = fts5DlidxIterPgno(pDlidx); 11539 11540 assert( fts5DlidxIterEof(p, pDlidx) || iLeafPgno<=pIter->iLeafPgno ); 11541 11542 if( iLeafPgno<pIter->iLeafPgno ){ 11543 pIter->iLeafPgno = iLeafPgno+1; 11544 fts5SegIterReverseNewPage(p, pIter); 11545 bMove = 0; 11546 } 11547 } 11548 11549 do{ 11550 if( bMove && p->rc==SQLITE_OK ) pIter->xNext(p, pIter, 0); 11551 if( pIter->pLeaf==0 ) break; 11552 if( bRev==0 && pIter->iRowid>=iMatch ) break; 11553 if( bRev!=0 && pIter->iRowid<=iMatch ) break; 11554 bMove = 1; 11555 }while( p->rc==SQLITE_OK ); 11556 } 11557 11558 11559 /* 11560 ** Free the iterator object passed as the second argument. 11561 */ 11562 static void fts5MultiIterFree(Fts5Iter *pIter){ 11563 if( pIter ){ 11564 int i; 11565 for(i=0; i<pIter->nSeg; i++){ 11566 fts5SegIterClear(&pIter->aSeg[i]); 11567 } 11568 fts5BufferFree(&pIter->poslist); 11569 sqlite3_free(pIter); 11570 } 11571 } 11572 11573 static void fts5MultiIterAdvanced( 11574 Fts5Index *p, /* FTS5 backend to iterate within */ 11575 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ 11576 int iChanged, /* Index of sub-iterator just advanced */ 11577 int iMinset /* Minimum entry in aFirst[] to set */ 11578 ){ 11579 int i; 11580 for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ 11581 int iEq; 11582 if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ 11583 Fts5SegIter *pSeg = &pIter->aSeg[iEq]; 11584 assert( p->rc==SQLITE_OK ); 11585 pSeg->xNext(p, pSeg, 0); 11586 i = pIter->nSeg + iEq; 11587 } 11588 } 11589 } 11590 11591 /* 11592 ** Sub-iterator iChanged of iterator pIter has just been advanced. It still 11593 ** points to the same term though - just a different rowid. This function 11594 ** attempts to update the contents of the pIter->aFirst[] accordingly. 11595 ** If it does so successfully, 0 is returned. Otherwise 1. 11596 ** 11597 ** If non-zero is returned, the caller should call fts5MultiIterAdvanced() 11598 ** on the iterator instead. That function does the same as this one, except 11599 ** that it deals with more complicated cases as well. 11600 */ 11601 static int fts5MultiIterAdvanceRowid( 11602 Fts5Iter *pIter, /* Iterator to update aFirst[] array for */ 11603 int iChanged, /* Index of sub-iterator just advanced */ 11604 Fts5SegIter **ppFirst 11605 ){ 11606 Fts5SegIter *pNew = &pIter->aSeg[iChanged]; 11607 11608 if( pNew->iRowid==pIter->iSwitchRowid 11609 || (pNew->iRowid<pIter->iSwitchRowid)==pIter->bRev 11610 ){ 11611 int i; 11612 Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; 11613 pIter->iSwitchRowid = pIter->bRev ? SMALLEST_INT64 : LARGEST_INT64; 11614 for(i=(pIter->nSeg+iChanged)/2; 1; i=i/2){ 11615 Fts5CResult *pRes = &pIter->aFirst[i]; 11616 11617 assert( pNew->pLeaf ); 11618 assert( pRes->bTermEq==0 || pOther->pLeaf ); 11619 11620 if( pRes->bTermEq ){ 11621 if( pNew->iRowid==pOther->iRowid ){ 11622 return 1; 11623 }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ 11624 pIter->iSwitchRowid = pOther->iRowid; 11625 pNew = pOther; 11626 }else if( (pOther->iRowid>pIter->iSwitchRowid)==pIter->bRev ){ 11627 pIter->iSwitchRowid = pOther->iRowid; 11628 } 11629 } 11630 pRes->iFirst = (u16)(pNew - pIter->aSeg); 11631 if( i==1 ) break; 11632 11633 pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; 11634 } 11635 } 11636 11637 *ppFirst = pNew; 11638 return 0; 11639 } 11640 11641 /* 11642 ** Set the pIter->bEof variable based on the state of the sub-iterators. 11643 */ 11644 static void fts5MultiIterSetEof(Fts5Iter *pIter){ 11645 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 11646 pIter->base.bEof = pSeg->pLeaf==0; 11647 pIter->iSwitchRowid = pSeg->iRowid; 11648 } 11649 11650 /* 11651 ** Move the iterator to the next entry. 11652 ** 11653 ** If an error occurs, an error code is left in Fts5Index.rc. It is not 11654 ** considered an error if the iterator reaches EOF, or if it is already at 11655 ** EOF when this function is called. 11656 */ 11657 static void fts5MultiIterNext( 11658 Fts5Index *p, 11659 Fts5Iter *pIter, 11660 int bFrom, /* True if argument iFrom is valid */ 11661 i64 iFrom /* Advance at least as far as this */ 11662 ){ 11663 int bUseFrom = bFrom; 11664 assert( pIter->base.bEof==0 ); 11665 while( p->rc==SQLITE_OK ){ 11666 int iFirst = pIter->aFirst[1].iFirst; 11667 int bNewTerm = 0; 11668 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 11669 assert( p->rc==SQLITE_OK ); 11670 if( bUseFrom && pSeg->pDlidx ){ 11671 fts5SegIterNextFrom(p, pSeg, iFrom); 11672 }else{ 11673 pSeg->xNext(p, pSeg, &bNewTerm); 11674 } 11675 11676 if( pSeg->pLeaf==0 || bNewTerm 11677 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) 11678 ){ 11679 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 11680 fts5MultiIterSetEof(pIter); 11681 pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; 11682 if( pSeg->pLeaf==0 ) return; 11683 } 11684 11685 fts5AssertMultiIterSetup(p, pIter); 11686 assert( pSeg==&pIter->aSeg[pIter->aFirst[1].iFirst] && pSeg->pLeaf ); 11687 if( pIter->bSkipEmpty==0 || pSeg->nPos ){ 11688 pIter->xSetOutputs(pIter, pSeg); 11689 return; 11690 } 11691 bUseFrom = 0; 11692 } 11693 } 11694 11695 static void fts5MultiIterNext2( 11696 Fts5Index *p, 11697 Fts5Iter *pIter, 11698 int *pbNewTerm /* OUT: True if *might* be new term */ 11699 ){ 11700 assert( pIter->bSkipEmpty ); 11701 if( p->rc==SQLITE_OK ){ 11702 *pbNewTerm = 0; 11703 do{ 11704 int iFirst = pIter->aFirst[1].iFirst; 11705 Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; 11706 int bNewTerm = 0; 11707 11708 assert( p->rc==SQLITE_OK ); 11709 pSeg->xNext(p, pSeg, &bNewTerm); 11710 if( pSeg->pLeaf==0 || bNewTerm 11711 || fts5MultiIterAdvanceRowid(pIter, iFirst, &pSeg) 11712 ){ 11713 fts5MultiIterAdvanced(p, pIter, iFirst, 1); 11714 fts5MultiIterSetEof(pIter); 11715 *pbNewTerm = 1; 11716 } 11717 fts5AssertMultiIterSetup(p, pIter); 11718 11719 }while( fts5MultiIterIsEmpty(p, pIter) ); 11720 } 11721 } 11722 11723 static void fts5IterSetOutputs_Noop(Fts5Iter *pUnused1, Fts5SegIter *pUnused2){ 11724 UNUSED_PARAM2(pUnused1, pUnused2); 11725 } 11726 11727 static Fts5Iter *fts5MultiIterAlloc( 11728 Fts5Index *p, /* FTS5 backend to iterate within */ 11729 int nSeg 11730 ){ 11731 Fts5Iter *pNew; 11732 int nSlot; /* Power of two >= nSeg */ 11733 11734 for(nSlot=2; nSlot<nSeg; nSlot=nSlot*2); 11735 pNew = fts5IdxMalloc(p, 11736 sizeof(Fts5Iter) + /* pNew */ 11737 sizeof(Fts5SegIter) * (nSlot-1) + /* pNew->aSeg[] */ 11738 sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ 11739 ); 11740 if( pNew ){ 11741 pNew->nSeg = nSlot; 11742 pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; 11743 pNew->pIndex = p; 11744 pNew->xSetOutputs = fts5IterSetOutputs_Noop; 11745 } 11746 return pNew; 11747 } 11748 11749 static void fts5PoslistCallback( 11750 Fts5Index *pUnused, 11751 void *pContext, 11752 const u8 *pChunk, int nChunk 11753 ){ 11754 UNUSED_PARAM(pUnused); 11755 assert_nc( nChunk>=0 ); 11756 if( nChunk>0 ){ 11757 fts5BufferSafeAppendBlob((Fts5Buffer*)pContext, pChunk, nChunk); 11758 } 11759 } 11760 11761 typedef struct PoslistCallbackCtx PoslistCallbackCtx; 11762 struct PoslistCallbackCtx { 11763 Fts5Buffer *pBuf; /* Append to this buffer */ 11764 Fts5Colset *pColset; /* Restrict matches to this column */ 11765 int eState; /* See above */ 11766 }; 11767 11768 typedef struct PoslistOffsetsCtx PoslistOffsetsCtx; 11769 struct PoslistOffsetsCtx { 11770 Fts5Buffer *pBuf; /* Append to this buffer */ 11771 Fts5Colset *pColset; /* Restrict matches to this column */ 11772 int iRead; 11773 int iWrite; 11774 }; 11775 11776 /* 11777 ** TODO: Make this more efficient! 11778 */ 11779 static int fts5IndexColsetTest(Fts5Colset *pColset, int iCol){ 11780 int i; 11781 for(i=0; i<pColset->nCol; i++){ 11782 if( pColset->aiCol[i]==iCol ) return 1; 11783 } 11784 return 0; 11785 } 11786 11787 static void fts5PoslistOffsetsCallback( 11788 Fts5Index *pUnused, 11789 void *pContext, 11790 const u8 *pChunk, int nChunk 11791 ){ 11792 PoslistOffsetsCtx *pCtx = (PoslistOffsetsCtx*)pContext; 11793 UNUSED_PARAM(pUnused); 11794 assert_nc( nChunk>=0 ); 11795 if( nChunk>0 ){ 11796 int i = 0; 11797 while( i<nChunk ){ 11798 int iVal; 11799 i += fts5GetVarint32(&pChunk[i], iVal); 11800 iVal += pCtx->iRead - 2; 11801 pCtx->iRead = iVal; 11802 if( fts5IndexColsetTest(pCtx->pColset, iVal) ){ 11803 fts5BufferSafeAppendVarint(pCtx->pBuf, iVal + 2 - pCtx->iWrite); 11804 pCtx->iWrite = iVal; 11805 } 11806 } 11807 } 11808 } 11809 11810 static void fts5PoslistFilterCallback( 11811 Fts5Index *pUnused, 11812 void *pContext, 11813 const u8 *pChunk, int nChunk 11814 ){ 11815 PoslistCallbackCtx *pCtx = (PoslistCallbackCtx*)pContext; 11816 UNUSED_PARAM(pUnused); 11817 assert_nc( nChunk>=0 ); 11818 if( nChunk>0 ){ 11819 /* Search through to find the first varint with value 1. This is the 11820 ** start of the next columns hits. */ 11821 int i = 0; 11822 int iStart = 0; 11823 11824 if( pCtx->eState==2 ){ 11825 int iCol; 11826 fts5FastGetVarint32(pChunk, i, iCol); 11827 if( fts5IndexColsetTest(pCtx->pColset, iCol) ){ 11828 pCtx->eState = 1; 11829 fts5BufferSafeAppendVarint(pCtx->pBuf, 1); 11830 }else{ 11831 pCtx->eState = 0; 11832 } 11833 } 11834 11835 do { 11836 while( i<nChunk && pChunk[i]!=0x01 ){ 11837 while( pChunk[i] & 0x80 ) i++; 11838 i++; 11839 } 11840 if( pCtx->eState ){ 11841 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); 11842 } 11843 if( i<nChunk ){ 11844 int iCol; 11845 iStart = i; 11846 i++; 11847 if( i>=nChunk ){ 11848 pCtx->eState = 2; 11849 }else{ 11850 fts5FastGetVarint32(pChunk, i, iCol); 11851 pCtx->eState = fts5IndexColsetTest(pCtx->pColset, iCol); 11852 if( pCtx->eState ){ 11853 fts5BufferSafeAppendBlob(pCtx->pBuf, &pChunk[iStart], i-iStart); 11854 iStart = i; 11855 } 11856 } 11857 } 11858 }while( i<nChunk ); 11859 } 11860 } 11861 11862 static void fts5ChunkIterate( 11863 Fts5Index *p, /* Index object */ 11864 Fts5SegIter *pSeg, /* Poslist of this iterator */ 11865 void *pCtx, /* Context pointer for xChunk callback */ 11866 void (*xChunk)(Fts5Index*, void*, const u8*, int) 11867 ){ 11868 int nRem = pSeg->nPos; /* Number of bytes still to come */ 11869 Fts5Data *pData = 0; 11870 u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 11871 int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); 11872 int pgno = pSeg->iLeafPgno; 11873 int pgnoSave = 0; 11874 11875 /* This function does not work with detail=none databases. */ 11876 assert( p->pConfig->eDetail!=FTS5_DETAIL_NONE ); 11877 11878 if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ 11879 pgnoSave = pgno+1; 11880 } 11881 11882 while( 1 ){ 11883 xChunk(p, pCtx, pChunk, nChunk); 11884 nRem -= nChunk; 11885 fts5DataRelease(pData); 11886 if( nRem<=0 ){ 11887 break; 11888 }else if( pSeg->pSeg==0 ){ 11889 p->rc = FTS5_CORRUPT; 11890 return; 11891 }else{ 11892 pgno++; 11893 pData = fts5LeafRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); 11894 if( pData==0 ) break; 11895 pChunk = &pData->p[4]; 11896 nChunk = MIN(nRem, pData->szLeaf - 4); 11897 if( pgno==pgnoSave ){ 11898 assert( pSeg->pNextLeaf==0 ); 11899 pSeg->pNextLeaf = pData; 11900 pData = 0; 11901 } 11902 } 11903 } 11904 } 11905 11906 /* 11907 ** Iterator pIter currently points to a valid entry (not EOF). This 11908 ** function appends the position list data for the current entry to 11909 ** buffer pBuf. It does not make a copy of the position-list size 11910 ** field. 11911 */ 11912 static void fts5SegiterPoslist( 11913 Fts5Index *p, 11914 Fts5SegIter *pSeg, 11915 Fts5Colset *pColset, 11916 Fts5Buffer *pBuf 11917 ){ 11918 assert( pBuf!=0 ); 11919 assert( pSeg!=0 ); 11920 if( 0==fts5BufferGrow(&p->rc, pBuf, pSeg->nPos+FTS5_DATA_ZERO_PADDING) ){ 11921 assert( pBuf->p!=0 ); 11922 assert( pBuf->nSpace >= pBuf->n+pSeg->nPos+FTS5_DATA_ZERO_PADDING ); 11923 memset(&pBuf->p[pBuf->n+pSeg->nPos], 0, FTS5_DATA_ZERO_PADDING); 11924 if( pColset==0 ){ 11925 fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); 11926 }else{ 11927 if( p->pConfig->eDetail==FTS5_DETAIL_FULL ){ 11928 PoslistCallbackCtx sCtx; 11929 sCtx.pBuf = pBuf; 11930 sCtx.pColset = pColset; 11931 sCtx.eState = fts5IndexColsetTest(pColset, 0); 11932 assert( sCtx.eState==0 || sCtx.eState==1 ); 11933 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistFilterCallback); 11934 }else{ 11935 PoslistOffsetsCtx sCtx; 11936 memset(&sCtx, 0, sizeof(sCtx)); 11937 sCtx.pBuf = pBuf; 11938 sCtx.pColset = pColset; 11939 fts5ChunkIterate(p, pSeg, (void*)&sCtx, fts5PoslistOffsetsCallback); 11940 } 11941 } 11942 } 11943 } 11944 11945 /* 11946 ** Parameter pPos points to a buffer containing a position list, size nPos. 11947 ** This function filters it according to pColset (which must be non-NULL) 11948 ** and sets pIter->base.pData/nData to point to the new position list. 11949 ** If memory is required for the new position list, use buffer pIter->poslist. 11950 ** Or, if the new position list is a contiguous subset of the input, set 11951 ** pIter->base.pData/nData to point directly to it. 11952 ** 11953 ** This function is a no-op if *pRc is other than SQLITE_OK when it is 11954 ** called. If an OOM error is encountered, *pRc is set to SQLITE_NOMEM 11955 ** before returning. 11956 */ 11957 static void fts5IndexExtractColset( 11958 int *pRc, 11959 Fts5Colset *pColset, /* Colset to filter on */ 11960 const u8 *pPos, int nPos, /* Position list */ 11961 Fts5Iter *pIter 11962 ){ 11963 if( *pRc==SQLITE_OK ){ 11964 const u8 *p = pPos; 11965 const u8 *aCopy = p; 11966 const u8 *pEnd = &p[nPos]; /* One byte past end of position list */ 11967 int i = 0; 11968 int iCurrent = 0; 11969 11970 if( pColset->nCol>1 && sqlite3Fts5BufferSize(pRc, &pIter->poslist, nPos) ){ 11971 return; 11972 } 11973 11974 while( 1 ){ 11975 while( pColset->aiCol[i]<iCurrent ){ 11976 i++; 11977 if( i==pColset->nCol ){ 11978 pIter->base.pData = pIter->poslist.p; 11979 pIter->base.nData = pIter->poslist.n; 11980 return; 11981 } 11982 } 11983 11984 /* Advance pointer p until it points to pEnd or an 0x01 byte that is 11985 ** not part of a varint */ 11986 while( p<pEnd && *p!=0x01 ){ 11987 while( *p++ & 0x80 ); 11988 } 11989 11990 if( pColset->aiCol[i]==iCurrent ){ 11991 if( pColset->nCol==1 ){ 11992 pIter->base.pData = aCopy; 11993 pIter->base.nData = p-aCopy; 11994 return; 11995 } 11996 fts5BufferSafeAppendBlob(&pIter->poslist, aCopy, p-aCopy); 11997 } 11998 if( p>=pEnd ){ 11999 pIter->base.pData = pIter->poslist.p; 12000 pIter->base.nData = pIter->poslist.n; 12001 return; 12002 } 12003 aCopy = p++; 12004 iCurrent = *p++; 12005 if( iCurrent & 0x80 ){ 12006 p--; 12007 p += fts5GetVarint32(p, iCurrent); 12008 } 12009 } 12010 } 12011 12012 } 12013 12014 /* 12015 ** xSetOutputs callback used by detail=none tables. 12016 */ 12017 static void fts5IterSetOutputs_None(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12018 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_NONE ); 12019 pIter->base.iRowid = pSeg->iRowid; 12020 pIter->base.nData = pSeg->nPos; 12021 } 12022 12023 /* 12024 ** xSetOutputs callback used by detail=full and detail=col tables when no 12025 ** column filters are specified. 12026 */ 12027 static void fts5IterSetOutputs_Nocolset(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12028 pIter->base.iRowid = pSeg->iRowid; 12029 pIter->base.nData = pSeg->nPos; 12030 12031 assert( pIter->pIndex->pConfig->eDetail!=FTS5_DETAIL_NONE ); 12032 assert( pIter->pColset==0 ); 12033 12034 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ 12035 /* All data is stored on the current page. Populate the output 12036 ** variables to point into the body of the page object. */ 12037 pIter->base.pData = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 12038 }else{ 12039 /* The data is distributed over two or more pages. Copy it into the 12040 ** Fts5Iter.poslist buffer and then set the output pointer to point 12041 ** to this buffer. */ 12042 fts5BufferZero(&pIter->poslist); 12043 fts5SegiterPoslist(pIter->pIndex, pSeg, 0, &pIter->poslist); 12044 pIter->base.pData = pIter->poslist.p; 12045 } 12046 } 12047 12048 /* 12049 ** xSetOutputs callback used when the Fts5Colset object has nCol==0 (match 12050 ** against no columns at all). 12051 */ 12052 static void fts5IterSetOutputs_ZeroColset(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12053 UNUSED_PARAM(pSeg); 12054 pIter->base.nData = 0; 12055 } 12056 12057 /* 12058 ** xSetOutputs callback used by detail=col when there is a column filter 12059 ** and there are 100 or more columns. Also called as a fallback from 12060 ** fts5IterSetOutputs_Col100 if the column-list spans more than one page. 12061 */ 12062 static void fts5IterSetOutputs_Col(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12063 fts5BufferZero(&pIter->poslist); 12064 fts5SegiterPoslist(pIter->pIndex, pSeg, pIter->pColset, &pIter->poslist); 12065 pIter->base.iRowid = pSeg->iRowid; 12066 pIter->base.pData = pIter->poslist.p; 12067 pIter->base.nData = pIter->poslist.n; 12068 } 12069 12070 /* 12071 ** xSetOutputs callback used when: 12072 ** 12073 ** * detail=col, 12074 ** * there is a column filter, and 12075 ** * the table contains 100 or fewer columns. 12076 ** 12077 ** The last point is to ensure all column numbers are stored as 12078 ** single-byte varints. 12079 */ 12080 static void fts5IterSetOutputs_Col100(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12081 12082 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_COLUMNS ); 12083 assert( pIter->pColset ); 12084 12085 if( pSeg->iLeafOffset+pSeg->nPos>pSeg->pLeaf->szLeaf ){ 12086 fts5IterSetOutputs_Col(pIter, pSeg); 12087 }else{ 12088 u8 *a = (u8*)&pSeg->pLeaf->p[pSeg->iLeafOffset]; 12089 u8 *pEnd = (u8*)&a[pSeg->nPos]; 12090 int iPrev = 0; 12091 int *aiCol = pIter->pColset->aiCol; 12092 int *aiColEnd = &aiCol[pIter->pColset->nCol]; 12093 12094 u8 *aOut = pIter->poslist.p; 12095 int iPrevOut = 0; 12096 12097 pIter->base.iRowid = pSeg->iRowid; 12098 12099 while( a<pEnd ){ 12100 iPrev += (int)a++[0] - 2; 12101 while( *aiCol<iPrev ){ 12102 aiCol++; 12103 if( aiCol==aiColEnd ) goto setoutputs_col_out; 12104 } 12105 if( *aiCol==iPrev ){ 12106 *aOut++ = (u8)((iPrev - iPrevOut) + 2); 12107 iPrevOut = iPrev; 12108 } 12109 } 12110 12111 setoutputs_col_out: 12112 pIter->base.pData = pIter->poslist.p; 12113 pIter->base.nData = aOut - pIter->poslist.p; 12114 } 12115 } 12116 12117 /* 12118 ** xSetOutputs callback used by detail=full when there is a column filter. 12119 */ 12120 static void fts5IterSetOutputs_Full(Fts5Iter *pIter, Fts5SegIter *pSeg){ 12121 Fts5Colset *pColset = pIter->pColset; 12122 pIter->base.iRowid = pSeg->iRowid; 12123 12124 assert( pIter->pIndex->pConfig->eDetail==FTS5_DETAIL_FULL ); 12125 assert( pColset ); 12126 12127 if( pSeg->iLeafOffset+pSeg->nPos<=pSeg->pLeaf->szLeaf ){ 12128 /* All data is stored on the current page. Populate the output 12129 ** variables to point into the body of the page object. */ 12130 const u8 *a = &pSeg->pLeaf->p[pSeg->iLeafOffset]; 12131 int *pRc = &pIter->pIndex->rc; 12132 fts5BufferZero(&pIter->poslist); 12133 fts5IndexExtractColset(pRc, pColset, a, pSeg->nPos, pIter); 12134 }else{ 12135 /* The data is distributed over two or more pages. Copy it into the 12136 ** Fts5Iter.poslist buffer and then set the output pointer to point 12137 ** to this buffer. */ 12138 fts5BufferZero(&pIter->poslist); 12139 fts5SegiterPoslist(pIter->pIndex, pSeg, pColset, &pIter->poslist); 12140 pIter->base.pData = pIter->poslist.p; 12141 pIter->base.nData = pIter->poslist.n; 12142 } 12143 } 12144 12145 static void fts5IterSetOutputCb(int *pRc, Fts5Iter *pIter){ 12146 assert( pIter!=0 || (*pRc)!=SQLITE_OK ); 12147 if( *pRc==SQLITE_OK ){ 12148 Fts5Config *pConfig = pIter->pIndex->pConfig; 12149 if( pConfig->eDetail==FTS5_DETAIL_NONE ){ 12150 pIter->xSetOutputs = fts5IterSetOutputs_None; 12151 } 12152 12153 else if( pIter->pColset==0 ){ 12154 pIter->xSetOutputs = fts5IterSetOutputs_Nocolset; 12155 } 12156 12157 else if( pIter->pColset->nCol==0 ){ 12158 pIter->xSetOutputs = fts5IterSetOutputs_ZeroColset; 12159 } 12160 12161 else if( pConfig->eDetail==FTS5_DETAIL_FULL ){ 12162 pIter->xSetOutputs = fts5IterSetOutputs_Full; 12163 } 12164 12165 else{ 12166 assert( pConfig->eDetail==FTS5_DETAIL_COLUMNS ); 12167 if( pConfig->nCol<=100 ){ 12168 pIter->xSetOutputs = fts5IterSetOutputs_Col100; 12169 sqlite3Fts5BufferSize(pRc, &pIter->poslist, pConfig->nCol); 12170 }else{ 12171 pIter->xSetOutputs = fts5IterSetOutputs_Col; 12172 } 12173 } 12174 } 12175 } 12176 12177 12178 /* 12179 ** Allocate a new Fts5Iter object. 12180 ** 12181 ** The new object will be used to iterate through data in structure pStruct. 12182 ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel 12183 ** is zero or greater, data from the first nSegment segments on level iLevel 12184 ** is merged. 12185 ** 12186 ** The iterator initially points to the first term/rowid entry in the 12187 ** iterated data. 12188 */ 12189 static void fts5MultiIterNew( 12190 Fts5Index *p, /* FTS5 backend to iterate within */ 12191 Fts5Structure *pStruct, /* Structure of specific index */ 12192 int flags, /* FTS5INDEX_QUERY_XXX flags */ 12193 Fts5Colset *pColset, /* Colset to filter on (or NULL) */ 12194 const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ 12195 int iLevel, /* Level to iterate (-1 for all) */ 12196 int nSegment, /* Number of segments to merge (iLevel>=0) */ 12197 Fts5Iter **ppOut /* New object */ 12198 ){ 12199 int nSeg = 0; /* Number of segment-iters in use */ 12200 int iIter = 0; /* */ 12201 int iSeg; /* Used to iterate through segments */ 12202 Fts5StructureLevel *pLvl; 12203 Fts5Iter *pNew; 12204 12205 assert( (pTerm==0 && nTerm==0) || iLevel<0 ); 12206 12207 /* Allocate space for the new multi-seg-iterator. */ 12208 if( p->rc==SQLITE_OK ){ 12209 if( iLevel<0 ){ 12210 assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); 12211 nSeg = pStruct->nSegment; 12212 nSeg += (p->pHash ? 1 : 0); 12213 }else{ 12214 nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); 12215 } 12216 } 12217 *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); 12218 if( pNew==0 ){ 12219 assert( p->rc!=SQLITE_OK ); 12220 goto fts5MultiIterNew_post_check; 12221 } 12222 pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); 12223 pNew->bSkipEmpty = (0!=(flags & FTS5INDEX_QUERY_SKIPEMPTY)); 12224 pNew->pColset = pColset; 12225 if( (flags & FTS5INDEX_QUERY_NOOUTPUT)==0 ){ 12226 fts5IterSetOutputCb(&p->rc, pNew); 12227 } 12228 12229 /* Initialize each of the component segment iterators. */ 12230 if( p->rc==SQLITE_OK ){ 12231 if( iLevel<0 ){ 12232 Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; 12233 if( p->pHash ){ 12234 /* Add a segment iterator for the current contents of the hash table. */ 12235 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 12236 fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); 12237 } 12238 for(pLvl=&pStruct->aLevel[0]; pLvl<pEnd; pLvl++){ 12239 for(iSeg=pLvl->nSeg-1; iSeg>=0; iSeg--){ 12240 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; 12241 Fts5SegIter *pIter = &pNew->aSeg[iIter++]; 12242 if( pTerm==0 ){ 12243 fts5SegIterInit(p, pSeg, pIter); 12244 }else{ 12245 fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); 12246 } 12247 } 12248 } 12249 }else{ 12250 pLvl = &pStruct->aLevel[iLevel]; 12251 for(iSeg=nSeg-1; iSeg>=0; iSeg--){ 12252 fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); 12253 } 12254 } 12255 assert( iIter==nSeg ); 12256 } 12257 12258 /* If the above was successful, each component iterators now points 12259 ** to the first entry in its segment. In this case initialize the 12260 ** aFirst[] array. Or, if an error has occurred, free the iterator 12261 ** object and set the output variable to NULL. */ 12262 if( p->rc==SQLITE_OK ){ 12263 for(iIter=pNew->nSeg-1; iIter>0; iIter--){ 12264 int iEq; 12265 if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ 12266 Fts5SegIter *pSeg = &pNew->aSeg[iEq]; 12267 if( p->rc==SQLITE_OK ) pSeg->xNext(p, pSeg, 0); 12268 fts5MultiIterAdvanced(p, pNew, iEq, iIter); 12269 } 12270 } 12271 fts5MultiIterSetEof(pNew); 12272 fts5AssertMultiIterSetup(p, pNew); 12273 12274 if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ 12275 fts5MultiIterNext(p, pNew, 0, 0); 12276 }else if( pNew->base.bEof==0 ){ 12277 Fts5SegIter *pSeg = &pNew->aSeg[pNew->aFirst[1].iFirst]; 12278 pNew->xSetOutputs(pNew, pSeg); 12279 } 12280 12281 }else{ 12282 fts5MultiIterFree(pNew); 12283 *ppOut = 0; 12284 } 12285 12286 fts5MultiIterNew_post_check: 12287 assert( (*ppOut)!=0 || p->rc!=SQLITE_OK ); 12288 return; 12289 } 12290 12291 /* 12292 ** Create an Fts5Iter that iterates through the doclist provided 12293 ** as the second argument. 12294 */ 12295 static void fts5MultiIterNew2( 12296 Fts5Index *p, /* FTS5 backend to iterate within */ 12297 Fts5Data *pData, /* Doclist to iterate through */ 12298 int bDesc, /* True for descending rowid order */ 12299 Fts5Iter **ppOut /* New object */ 12300 ){ 12301 Fts5Iter *pNew; 12302 pNew = fts5MultiIterAlloc(p, 2); 12303 if( pNew ){ 12304 Fts5SegIter *pIter = &pNew->aSeg[1]; 12305 12306 pIter->flags = FTS5_SEGITER_ONETERM; 12307 if( pData->szLeaf>0 ){ 12308 pIter->pLeaf = pData; 12309 pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); 12310 pIter->iEndofDoclist = pData->nn; 12311 pNew->aFirst[1].iFirst = 1; 12312 if( bDesc ){ 12313 pNew->bRev = 1; 12314 pIter->flags |= FTS5_SEGITER_REVERSE; 12315 fts5SegIterReverseInitPage(p, pIter); 12316 }else{ 12317 fts5SegIterLoadNPos(p, pIter); 12318 } 12319 pData = 0; 12320 }else{ 12321 pNew->base.bEof = 1; 12322 } 12323 fts5SegIterSetNext(p, pIter); 12324 12325 *ppOut = pNew; 12326 } 12327 12328 fts5DataRelease(pData); 12329 } 12330 12331 /* 12332 ** Return true if the iterator is at EOF or if an error has occurred. 12333 ** False otherwise. 12334 */ 12335 static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ 12336 assert( pIter!=0 || p->rc!=SQLITE_OK ); 12337 assert( p->rc!=SQLITE_OK 12338 || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof 12339 ); 12340 return (p->rc || pIter->base.bEof); 12341 } 12342 12343 /* 12344 ** Return the rowid of the entry that the iterator currently points 12345 ** to. If the iterator points to EOF when this function is called the 12346 ** results are undefined. 12347 */ 12348 static i64 fts5MultiIterRowid(Fts5Iter *pIter){ 12349 assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); 12350 return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; 12351 } 12352 12353 /* 12354 ** Move the iterator to the next entry at or following iMatch. 12355 */ 12356 static void fts5MultiIterNextFrom( 12357 Fts5Index *p, 12358 Fts5Iter *pIter, 12359 i64 iMatch 12360 ){ 12361 while( 1 ){ 12362 i64 iRowid; 12363 fts5MultiIterNext(p, pIter, 1, iMatch); 12364 if( fts5MultiIterEof(p, pIter) ) break; 12365 iRowid = fts5MultiIterRowid(pIter); 12366 if( pIter->bRev==0 && iRowid>=iMatch ) break; 12367 if( pIter->bRev!=0 && iRowid<=iMatch ) break; 12368 } 12369 } 12370 12371 /* 12372 ** Return a pointer to a buffer containing the term associated with the 12373 ** entry that the iterator currently points to. 12374 */ 12375 static const u8 *fts5MultiIterTerm(Fts5Iter *pIter, int *pn){ 12376 Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 12377 *pn = p->term.n; 12378 return p->term.p; 12379 } 12380 12381 /* 12382 ** Allocate a new segment-id for the structure pStruct. The new segment 12383 ** id must be between 1 and 65335 inclusive, and must not be used by 12384 ** any currently existing segment. If a free segment id cannot be found, 12385 ** SQLITE_FULL is returned. 12386 ** 12387 ** If an error has already occurred, this function is a no-op. 0 is 12388 ** returned in this case. 12389 */ 12390 static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ 12391 int iSegid = 0; 12392 12393 if( p->rc==SQLITE_OK ){ 12394 if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ 12395 p->rc = SQLITE_FULL; 12396 }else{ 12397 /* FTS5_MAX_SEGMENT is currently defined as 2000. So the following 12398 ** array is 63 elements, or 252 bytes, in size. */ 12399 u32 aUsed[(FTS5_MAX_SEGMENT+31) / 32]; 12400 int iLvl, iSeg; 12401 int i; 12402 u32 mask; 12403 memset(aUsed, 0, sizeof(aUsed)); 12404 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 12405 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 12406 int iId = pStruct->aLevel[iLvl].aSeg[iSeg].iSegid; 12407 if( iId<=FTS5_MAX_SEGMENT && iId>0 ){ 12408 aUsed[(iId-1) / 32] |= (u32)1 << ((iId-1) % 32); 12409 } 12410 } 12411 } 12412 12413 for(i=0; aUsed[i]==0xFFFFFFFF; i++); 12414 mask = aUsed[i]; 12415 for(iSegid=0; mask & ((u32)1 << iSegid); iSegid++); 12416 iSegid += 1 + i*32; 12417 12418 #ifdef SQLITE_DEBUG 12419 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 12420 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 12421 assert_nc( iSegid!=pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ); 12422 } 12423 } 12424 assert_nc( iSegid>0 && iSegid<=FTS5_MAX_SEGMENT ); 12425 12426 { 12427 sqlite3_stmt *pIdxSelect = fts5IdxSelectStmt(p); 12428 if( p->rc==SQLITE_OK ){ 12429 u8 aBlob[2] = {0xff, 0xff}; 12430 sqlite3_bind_int(pIdxSelect, 1, iSegid); 12431 sqlite3_bind_blob(pIdxSelect, 2, aBlob, 2, SQLITE_STATIC); 12432 assert_nc( sqlite3_step(pIdxSelect)!=SQLITE_ROW ); 12433 p->rc = sqlite3_reset(pIdxSelect); 12434 sqlite3_bind_null(pIdxSelect, 2); 12435 } 12436 } 12437 #endif 12438 } 12439 } 12440 12441 return iSegid; 12442 } 12443 12444 /* 12445 ** Discard all data currently cached in the hash-tables. 12446 */ 12447 static void fts5IndexDiscardData(Fts5Index *p){ 12448 assert( p->pHash || p->nPendingData==0 ); 12449 if( p->pHash ){ 12450 sqlite3Fts5HashClear(p->pHash); 12451 p->nPendingData = 0; 12452 } 12453 } 12454 12455 /* 12456 ** Return the size of the prefix, in bytes, that buffer 12457 ** (pNew/<length-unknown>) shares with buffer (pOld/nOld). 12458 ** 12459 ** Buffer (pNew/<length-unknown>) is guaranteed to be greater 12460 ** than buffer (pOld/nOld). 12461 */ 12462 static int fts5PrefixCompress(int nOld, const u8 *pOld, const u8 *pNew){ 12463 int i; 12464 for(i=0; i<nOld; i++){ 12465 if( pOld[i]!=pNew[i] ) break; 12466 } 12467 return i; 12468 } 12469 12470 static void fts5WriteDlidxClear( 12471 Fts5Index *p, 12472 Fts5SegWriter *pWriter, 12473 int bFlush /* If true, write dlidx to disk */ 12474 ){ 12475 int i; 12476 assert( bFlush==0 || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); 12477 for(i=0; i<pWriter->nDlidx; i++){ 12478 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 12479 if( pDlidx->buf.n==0 ) break; 12480 if( bFlush ){ 12481 assert( pDlidx->pgno!=0 ); 12482 fts5DataWrite(p, 12483 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 12484 pDlidx->buf.p, pDlidx->buf.n 12485 ); 12486 } 12487 sqlite3Fts5BufferZero(&pDlidx->buf); 12488 pDlidx->bPrevValid = 0; 12489 } 12490 } 12491 12492 /* 12493 ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. 12494 ** Any new array elements are zeroed before returning. 12495 */ 12496 static int fts5WriteDlidxGrow( 12497 Fts5Index *p, 12498 Fts5SegWriter *pWriter, 12499 int nLvl 12500 ){ 12501 if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ 12502 Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc64( 12503 pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl 12504 ); 12505 if( aDlidx==0 ){ 12506 p->rc = SQLITE_NOMEM; 12507 }else{ 12508 size_t nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); 12509 memset(&aDlidx[pWriter->nDlidx], 0, nByte); 12510 pWriter->aDlidx = aDlidx; 12511 pWriter->nDlidx = nLvl; 12512 } 12513 } 12514 return p->rc; 12515 } 12516 12517 /* 12518 ** If the current doclist-index accumulating in pWriter->aDlidx[] is large 12519 ** enough, flush it to disk and return 1. Otherwise discard it and return 12520 ** zero. 12521 */ 12522 static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ 12523 int bFlag = 0; 12524 12525 /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written 12526 ** to the database, also write the doclist-index to disk. */ 12527 if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ 12528 bFlag = 1; 12529 } 12530 fts5WriteDlidxClear(p, pWriter, bFlag); 12531 pWriter->nEmpty = 0; 12532 return bFlag; 12533 } 12534 12535 /* 12536 ** This function is called whenever processing of the doclist for the 12537 ** last term on leaf page (pWriter->iBtPage) is completed. 12538 ** 12539 ** The doclist-index for that term is currently stored in-memory within the 12540 ** Fts5SegWriter.aDlidx[] array. If it is large enough, this function 12541 ** writes it out to disk. Or, if it is too small to bother with, discards 12542 ** it. 12543 ** 12544 ** Fts5SegWriter.btterm currently contains the first term on page iBtPage. 12545 */ 12546 static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ 12547 int bFlag; 12548 12549 assert( pWriter->iBtPage || pWriter->nEmpty==0 ); 12550 if( pWriter->iBtPage==0 ) return; 12551 bFlag = fts5WriteFlushDlidx(p, pWriter); 12552 12553 if( p->rc==SQLITE_OK ){ 12554 const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); 12555 /* The following was already done in fts5WriteInit(): */ 12556 /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ 12557 sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); 12558 sqlite3_bind_int64(p->pIdxWriter, 3, bFlag + ((i64)pWriter->iBtPage<<1)); 12559 sqlite3_step(p->pIdxWriter); 12560 p->rc = sqlite3_reset(p->pIdxWriter); 12561 sqlite3_bind_null(p->pIdxWriter, 2); 12562 } 12563 pWriter->iBtPage = 0; 12564 } 12565 12566 /* 12567 ** This is called once for each leaf page except the first that contains 12568 ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that 12569 ** is larger than all terms written to earlier leaves, and equal to or 12570 ** smaller than the first term on the new leaf. 12571 ** 12572 ** If an error occurs, an error code is left in Fts5Index.rc. If an error 12573 ** has already occurred when this function is called, it is a no-op. 12574 */ 12575 static void fts5WriteBtreeTerm( 12576 Fts5Index *p, /* FTS5 backend object */ 12577 Fts5SegWriter *pWriter, /* Writer object */ 12578 int nTerm, const u8 *pTerm /* First term on new page */ 12579 ){ 12580 fts5WriteFlushBtree(p, pWriter); 12581 if( p->rc==SQLITE_OK ){ 12582 fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); 12583 pWriter->iBtPage = pWriter->writer.pgno; 12584 } 12585 } 12586 12587 /* 12588 ** This function is called when flushing a leaf page that contains no 12589 ** terms at all to disk. 12590 */ 12591 static void fts5WriteBtreeNoTerm( 12592 Fts5Index *p, /* FTS5 backend object */ 12593 Fts5SegWriter *pWriter /* Writer object */ 12594 ){ 12595 /* If there were no rowids on the leaf page either and the doclist-index 12596 ** has already been started, append an 0x00 byte to it. */ 12597 if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ 12598 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; 12599 assert( pDlidx->bPrevValid ); 12600 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); 12601 } 12602 12603 /* Increment the "number of sequential leaves without a term" counter. */ 12604 pWriter->nEmpty++; 12605 } 12606 12607 static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ 12608 i64 iRowid; 12609 int iOff; 12610 12611 iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); 12612 fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); 12613 return iRowid; 12614 } 12615 12616 /* 12617 ** Rowid iRowid has just been appended to the current leaf page. It is the 12618 ** first on the page. This function appends an appropriate entry to the current 12619 ** doclist-index. 12620 */ 12621 static void fts5WriteDlidxAppend( 12622 Fts5Index *p, 12623 Fts5SegWriter *pWriter, 12624 i64 iRowid 12625 ){ 12626 int i; 12627 int bDone = 0; 12628 12629 for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ 12630 i64 iVal; 12631 Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; 12632 12633 if( pDlidx->buf.n>=p->pConfig->pgsz ){ 12634 /* The current doclist-index page is full. Write it to disk and push 12635 ** a copy of iRowid (which will become the first rowid on the next 12636 ** doclist-index leaf page) up into the next level of the b-tree 12637 ** hierarchy. If the node being flushed is currently the root node, 12638 ** also push its first rowid upwards. */ 12639 pDlidx->buf.p[0] = 0x01; /* Not the root node */ 12640 fts5DataWrite(p, 12641 FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), 12642 pDlidx->buf.p, pDlidx->buf.n 12643 ); 12644 fts5WriteDlidxGrow(p, pWriter, i+2); 12645 pDlidx = &pWriter->aDlidx[i]; 12646 if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ 12647 i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); 12648 12649 /* This was the root node. Push its first rowid up to the new root. */ 12650 pDlidx[1].pgno = pDlidx->pgno; 12651 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); 12652 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); 12653 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); 12654 pDlidx[1].bPrevValid = 1; 12655 pDlidx[1].iPrev = iFirst; 12656 } 12657 12658 sqlite3Fts5BufferZero(&pDlidx->buf); 12659 pDlidx->bPrevValid = 0; 12660 pDlidx->pgno++; 12661 }else{ 12662 bDone = 1; 12663 } 12664 12665 if( pDlidx->bPrevValid ){ 12666 iVal = iRowid - pDlidx->iPrev; 12667 }else{ 12668 i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); 12669 assert( pDlidx->buf.n==0 ); 12670 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); 12671 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); 12672 iVal = iRowid; 12673 } 12674 12675 sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); 12676 pDlidx->bPrevValid = 1; 12677 pDlidx->iPrev = iRowid; 12678 } 12679 } 12680 12681 static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ 12682 static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; 12683 Fts5PageWriter *pPage = &pWriter->writer; 12684 i64 iRowid; 12685 12686 assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); 12687 12688 /* Set the szLeaf header field. */ 12689 assert( 0==fts5GetU16(&pPage->buf.p[2]) ); 12690 fts5PutU16(&pPage->buf.p[2], (u16)pPage->buf.n); 12691 12692 if( pWriter->bFirstTermInPage ){ 12693 /* No term was written to this page. */ 12694 assert( pPage->pgidx.n==0 ); 12695 fts5WriteBtreeNoTerm(p, pWriter); 12696 }else{ 12697 /* Append the pgidx to the page buffer. Set the szLeaf header field. */ 12698 fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); 12699 } 12700 12701 /* Write the page out to disk */ 12702 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); 12703 fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); 12704 12705 /* Initialize the next page. */ 12706 fts5BufferZero(&pPage->buf); 12707 fts5BufferZero(&pPage->pgidx); 12708 fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); 12709 pPage->iPrevPgidx = 0; 12710 pPage->pgno++; 12711 12712 /* Increase the leaves written counter */ 12713 pWriter->nLeafWritten++; 12714 12715 /* The new leaf holds no terms or rowids */ 12716 pWriter->bFirstTermInPage = 1; 12717 pWriter->bFirstRowidInPage = 1; 12718 } 12719 12720 /* 12721 ** Append term pTerm/nTerm to the segment being written by the writer passed 12722 ** as the second argument. 12723 ** 12724 ** If an error occurs, set the Fts5Index.rc error code. If an error has 12725 ** already occurred, this function is a no-op. 12726 */ 12727 static void fts5WriteAppendTerm( 12728 Fts5Index *p, 12729 Fts5SegWriter *pWriter, 12730 int nTerm, const u8 *pTerm 12731 ){ 12732 int nPrefix; /* Bytes of prefix compression for term */ 12733 Fts5PageWriter *pPage = &pWriter->writer; 12734 Fts5Buffer *pPgidx = &pWriter->writer.pgidx; 12735 int nMin = MIN(pPage->term.n, nTerm); 12736 12737 assert( p->rc==SQLITE_OK ); 12738 assert( pPage->buf.n>=4 ); 12739 assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); 12740 12741 /* If the current leaf page is full, flush it to disk. */ 12742 if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ 12743 if( pPage->buf.n>4 ){ 12744 fts5WriteFlushLeaf(p, pWriter); 12745 if( p->rc!=SQLITE_OK ) return; 12746 } 12747 fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); 12748 } 12749 12750 /* TODO1: Updating pgidx here. */ 12751 pPgidx->n += sqlite3Fts5PutVarint( 12752 &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx 12753 ); 12754 pPage->iPrevPgidx = pPage->buf.n; 12755 #if 0 12756 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); 12757 pPgidx->n += 2; 12758 #endif 12759 12760 if( pWriter->bFirstTermInPage ){ 12761 nPrefix = 0; 12762 if( pPage->pgno!=1 ){ 12763 /* This is the first term on a leaf that is not the leftmost leaf in 12764 ** the segment b-tree. In this case it is necessary to add a term to 12765 ** the b-tree hierarchy that is (a) larger than the largest term 12766 ** already written to the segment and (b) smaller than or equal to 12767 ** this term. In other words, a prefix of (pTerm/nTerm) that is one 12768 ** byte longer than the longest prefix (pTerm/nTerm) shares with the 12769 ** previous term. 12770 ** 12771 ** Usually, the previous term is available in pPage->term. The exception 12772 ** is if this is the first term written in an incremental-merge step. 12773 ** In this case the previous term is not available, so just write a 12774 ** copy of (pTerm/nTerm) into the parent node. This is slightly 12775 ** inefficient, but still correct. */ 12776 int n = nTerm; 12777 if( pPage->term.n ){ 12778 n = 1 + fts5PrefixCompress(nMin, pPage->term.p, pTerm); 12779 } 12780 fts5WriteBtreeTerm(p, pWriter, n, pTerm); 12781 if( p->rc!=SQLITE_OK ) return; 12782 pPage = &pWriter->writer; 12783 } 12784 }else{ 12785 nPrefix = fts5PrefixCompress(nMin, pPage->term.p, pTerm); 12786 fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); 12787 } 12788 12789 /* Append the number of bytes of new data, then the term data itself 12790 ** to the page. */ 12791 fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); 12792 fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); 12793 12794 /* Update the Fts5PageWriter.term field. */ 12795 fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); 12796 pWriter->bFirstTermInPage = 0; 12797 12798 pWriter->bFirstRowidInPage = 0; 12799 pWriter->bFirstRowidInDoclist = 1; 12800 12801 assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); 12802 pWriter->aDlidx[0].pgno = pPage->pgno; 12803 } 12804 12805 /* 12806 ** Append a rowid and position-list size field to the writers output. 12807 */ 12808 static void fts5WriteAppendRowid( 12809 Fts5Index *p, 12810 Fts5SegWriter *pWriter, 12811 i64 iRowid 12812 ){ 12813 if( p->rc==SQLITE_OK ){ 12814 Fts5PageWriter *pPage = &pWriter->writer; 12815 12816 if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ 12817 fts5WriteFlushLeaf(p, pWriter); 12818 } 12819 12820 /* If this is to be the first rowid written to the page, set the 12821 ** rowid-pointer in the page-header. Also append a value to the dlidx 12822 ** buffer, in case a doclist-index is required. */ 12823 if( pWriter->bFirstRowidInPage ){ 12824 fts5PutU16(pPage->buf.p, (u16)pPage->buf.n); 12825 fts5WriteDlidxAppend(p, pWriter, iRowid); 12826 } 12827 12828 /* Write the rowid. */ 12829 if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ 12830 fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); 12831 }else{ 12832 assert_nc( p->rc || iRowid>pWriter->iPrevRowid ); 12833 fts5BufferAppendVarint(&p->rc, &pPage->buf, 12834 (u64)iRowid - (u64)pWriter->iPrevRowid 12835 ); 12836 } 12837 pWriter->iPrevRowid = iRowid; 12838 pWriter->bFirstRowidInDoclist = 0; 12839 pWriter->bFirstRowidInPage = 0; 12840 } 12841 } 12842 12843 static void fts5WriteAppendPoslistData( 12844 Fts5Index *p, 12845 Fts5SegWriter *pWriter, 12846 const u8 *aData, 12847 int nData 12848 ){ 12849 Fts5PageWriter *pPage = &pWriter->writer; 12850 const u8 *a = aData; 12851 int n = nData; 12852 12853 assert( p->pConfig->pgsz>0 ); 12854 while( p->rc==SQLITE_OK 12855 && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz 12856 ){ 12857 int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; 12858 int nCopy = 0; 12859 while( nCopy<nReq ){ 12860 i64 dummy; 12861 nCopy += fts5GetVarint(&a[nCopy], (u64*)&dummy); 12862 } 12863 fts5BufferAppendBlob(&p->rc, &pPage->buf, nCopy, a); 12864 a += nCopy; 12865 n -= nCopy; 12866 fts5WriteFlushLeaf(p, pWriter); 12867 } 12868 if( n>0 ){ 12869 fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); 12870 } 12871 } 12872 12873 /* 12874 ** Flush any data cached by the writer object to the database. Free any 12875 ** allocations associated with the writer. 12876 */ 12877 static void fts5WriteFinish( 12878 Fts5Index *p, 12879 Fts5SegWriter *pWriter, /* Writer object */ 12880 int *pnLeaf /* OUT: Number of leaf pages in b-tree */ 12881 ){ 12882 int i; 12883 Fts5PageWriter *pLeaf = &pWriter->writer; 12884 if( p->rc==SQLITE_OK ){ 12885 assert( pLeaf->pgno>=1 ); 12886 if( pLeaf->buf.n>4 ){ 12887 fts5WriteFlushLeaf(p, pWriter); 12888 } 12889 *pnLeaf = pLeaf->pgno-1; 12890 if( pLeaf->pgno>1 ){ 12891 fts5WriteFlushBtree(p, pWriter); 12892 } 12893 } 12894 fts5BufferFree(&pLeaf->term); 12895 fts5BufferFree(&pLeaf->buf); 12896 fts5BufferFree(&pLeaf->pgidx); 12897 fts5BufferFree(&pWriter->btterm); 12898 12899 for(i=0; i<pWriter->nDlidx; i++){ 12900 sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); 12901 } 12902 sqlite3_free(pWriter->aDlidx); 12903 } 12904 12905 static void fts5WriteInit( 12906 Fts5Index *p, 12907 Fts5SegWriter *pWriter, 12908 int iSegid 12909 ){ 12910 const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; 12911 12912 memset(pWriter, 0, sizeof(Fts5SegWriter)); 12913 pWriter->iSegid = iSegid; 12914 12915 fts5WriteDlidxGrow(p, pWriter, 1); 12916 pWriter->writer.pgno = 1; 12917 pWriter->bFirstTermInPage = 1; 12918 pWriter->iBtPage = 1; 12919 12920 assert( pWriter->writer.buf.n==0 ); 12921 assert( pWriter->writer.pgidx.n==0 ); 12922 12923 /* Grow the two buffers to pgsz + padding bytes in size. */ 12924 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.pgidx, nBuffer); 12925 sqlite3Fts5BufferSize(&p->rc, &pWriter->writer.buf, nBuffer); 12926 12927 if( p->pIdxWriter==0 ){ 12928 Fts5Config *pConfig = p->pConfig; 12929 fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( 12930 "INSERT INTO '%q'.'%q_idx'(segid,term,pgno) VALUES(?,?,?)", 12931 pConfig->zDb, pConfig->zName 12932 )); 12933 } 12934 12935 if( p->rc==SQLITE_OK ){ 12936 /* Initialize the 4-byte leaf-page header to 0x00. */ 12937 memset(pWriter->writer.buf.p, 0, 4); 12938 pWriter->writer.buf.n = 4; 12939 12940 /* Bind the current output segment id to the index-writer. This is an 12941 ** optimization over binding the same value over and over as rows are 12942 ** inserted into %_idx by the current writer. */ 12943 sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); 12944 } 12945 } 12946 12947 /* 12948 ** Iterator pIter was used to iterate through the input segments of on an 12949 ** incremental merge operation. This function is called if the incremental 12950 ** merge step has finished but the input has not been completely exhausted. 12951 */ 12952 static void fts5TrimSegments(Fts5Index *p, Fts5Iter *pIter){ 12953 int i; 12954 Fts5Buffer buf; 12955 memset(&buf, 0, sizeof(Fts5Buffer)); 12956 for(i=0; i<pIter->nSeg && p->rc==SQLITE_OK; i++){ 12957 Fts5SegIter *pSeg = &pIter->aSeg[i]; 12958 if( pSeg->pSeg==0 ){ 12959 /* no-op */ 12960 }else if( pSeg->pLeaf==0 ){ 12961 /* All keys from this input segment have been transfered to the output. 12962 ** Set both the first and last page-numbers to 0 to indicate that the 12963 ** segment is now empty. */ 12964 pSeg->pSeg->pgnoLast = 0; 12965 pSeg->pSeg->pgnoFirst = 0; 12966 }else{ 12967 int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ 12968 i64 iLeafRowid; 12969 Fts5Data *pData; 12970 int iId = pSeg->pSeg->iSegid; 12971 u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; 12972 12973 iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); 12974 pData = fts5LeafRead(p, iLeafRowid); 12975 if( pData ){ 12976 if( iOff>pData->szLeaf ){ 12977 /* This can occur if the pages that the segments occupy overlap - if 12978 ** a single page has been assigned to more than one segment. In 12979 ** this case a prior iteration of this loop may have corrupted the 12980 ** segment currently being trimmed. */ 12981 p->rc = FTS5_CORRUPT; 12982 }else{ 12983 fts5BufferZero(&buf); 12984 fts5BufferGrow(&p->rc, &buf, pData->nn); 12985 fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); 12986 fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); 12987 fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); 12988 fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff,&pData->p[iOff]); 12989 if( p->rc==SQLITE_OK ){ 12990 /* Set the szLeaf field */ 12991 fts5PutU16(&buf.p[2], (u16)buf.n); 12992 } 12993 12994 /* Set up the new page-index array */ 12995 fts5BufferAppendVarint(&p->rc, &buf, 4); 12996 if( pSeg->iLeafPgno==pSeg->iTermLeafPgno 12997 && pSeg->iEndofDoclist<pData->szLeaf 12998 && pSeg->iPgidxOff<=pData->nn 12999 ){ 13000 int nDiff = pData->szLeaf - pSeg->iEndofDoclist; 13001 fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); 13002 fts5BufferAppendBlob(&p->rc, &buf, 13003 pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] 13004 ); 13005 } 13006 13007 pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; 13008 fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); 13009 fts5DataWrite(p, iLeafRowid, buf.p, buf.n); 13010 } 13011 fts5DataRelease(pData); 13012 } 13013 } 13014 } 13015 fts5BufferFree(&buf); 13016 } 13017 13018 static void fts5MergeChunkCallback( 13019 Fts5Index *p, 13020 void *pCtx, 13021 const u8 *pChunk, int nChunk 13022 ){ 13023 Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; 13024 fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); 13025 } 13026 13027 /* 13028 ** 13029 */ 13030 static void fts5IndexMergeLevel( 13031 Fts5Index *p, /* FTS5 backend object */ 13032 Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ 13033 int iLvl, /* Level to read input from */ 13034 int *pnRem /* Write up to this many output leaves */ 13035 ){ 13036 Fts5Structure *pStruct = *ppStruct; 13037 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 13038 Fts5StructureLevel *pLvlOut; 13039 Fts5Iter *pIter = 0; /* Iterator to read input data */ 13040 int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ 13041 int nInput; /* Number of input segments */ 13042 Fts5SegWriter writer; /* Writer object */ 13043 Fts5StructureSegment *pSeg; /* Output segment */ 13044 Fts5Buffer term; 13045 int bOldest; /* True if the output segment is the oldest */ 13046 int eDetail = p->pConfig->eDetail; 13047 const int flags = FTS5INDEX_QUERY_NOOUTPUT; 13048 int bTermWritten = 0; /* True if current term already output */ 13049 13050 assert( iLvl<pStruct->nLevel ); 13051 assert( pLvl->nMerge<=pLvl->nSeg ); 13052 13053 memset(&writer, 0, sizeof(Fts5SegWriter)); 13054 memset(&term, 0, sizeof(Fts5Buffer)); 13055 if( pLvl->nMerge ){ 13056 pLvlOut = &pStruct->aLevel[iLvl+1]; 13057 assert( pLvlOut->nSeg>0 ); 13058 nInput = pLvl->nMerge; 13059 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; 13060 13061 fts5WriteInit(p, &writer, pSeg->iSegid); 13062 writer.writer.pgno = pSeg->pgnoLast+1; 13063 writer.iBtPage = 0; 13064 }else{ 13065 int iSegid = fts5AllocateSegid(p, pStruct); 13066 13067 /* Extend the Fts5Structure object as required to ensure the output 13068 ** segment exists. */ 13069 if( iLvl==pStruct->nLevel-1 ){ 13070 fts5StructureAddLevel(&p->rc, ppStruct); 13071 pStruct = *ppStruct; 13072 } 13073 fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); 13074 if( p->rc ) return; 13075 pLvl = &pStruct->aLevel[iLvl]; 13076 pLvlOut = &pStruct->aLevel[iLvl+1]; 13077 13078 fts5WriteInit(p, &writer, iSegid); 13079 13080 /* Add the new segment to the output level */ 13081 pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; 13082 pLvlOut->nSeg++; 13083 pSeg->pgnoFirst = 1; 13084 pSeg->iSegid = iSegid; 13085 pStruct->nSegment++; 13086 13087 /* Read input from all segments in the input level */ 13088 nInput = pLvl->nSeg; 13089 } 13090 bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); 13091 13092 assert( iLvl>=0 ); 13093 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, iLvl, nInput, &pIter); 13094 fts5MultiIterEof(p, pIter)==0; 13095 fts5MultiIterNext(p, pIter, 0, 0) 13096 ){ 13097 Fts5SegIter *pSegIter = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 13098 int nPos; /* position-list size field value */ 13099 int nTerm; 13100 const u8 *pTerm; 13101 13102 pTerm = fts5MultiIterTerm(pIter, &nTerm); 13103 if( nTerm!=term.n || fts5Memcmp(pTerm, term.p, nTerm) ){ 13104 if( pnRem && writer.nLeafWritten>nRem ){ 13105 break; 13106 } 13107 fts5BufferSet(&p->rc, &term, nTerm, pTerm); 13108 bTermWritten =0; 13109 } 13110 13111 /* Check for key annihilation. */ 13112 if( pSegIter->nPos==0 && (bOldest || pSegIter->bDel==0) ) continue; 13113 13114 if( p->rc==SQLITE_OK && bTermWritten==0 ){ 13115 /* This is a new term. Append a term to the output segment. */ 13116 fts5WriteAppendTerm(p, &writer, nTerm, pTerm); 13117 bTermWritten = 1; 13118 } 13119 13120 /* Append the rowid to the output */ 13121 /* WRITEPOSLISTSIZE */ 13122 fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter)); 13123 13124 if( eDetail==FTS5_DETAIL_NONE ){ 13125 if( pSegIter->bDel ){ 13126 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); 13127 if( pSegIter->nPos>0 ){ 13128 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, 0); 13129 } 13130 } 13131 }else{ 13132 /* Append the position-list data to the output */ 13133 nPos = pSegIter->nPos*2 + pSegIter->bDel; 13134 fts5BufferAppendVarint(&p->rc, &writer.writer.buf, nPos); 13135 fts5ChunkIterate(p, pSegIter, (void*)&writer, fts5MergeChunkCallback); 13136 } 13137 } 13138 13139 /* Flush the last leaf page to disk. Set the output segment b-tree height 13140 ** and last leaf page number at the same time. */ 13141 fts5WriteFinish(p, &writer, &pSeg->pgnoLast); 13142 13143 assert( pIter!=0 || p->rc!=SQLITE_OK ); 13144 if( fts5MultiIterEof(p, pIter) ){ 13145 int i; 13146 13147 /* Remove the redundant segments from the %_data table */ 13148 for(i=0; i<nInput; i++){ 13149 fts5DataRemoveSegment(p, pLvl->aSeg[i].iSegid); 13150 } 13151 13152 /* Remove the redundant segments from the input level */ 13153 if( pLvl->nSeg!=nInput ){ 13154 int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); 13155 memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); 13156 } 13157 pStruct->nSegment -= nInput; 13158 pLvl->nSeg -= nInput; 13159 pLvl->nMerge = 0; 13160 if( pSeg->pgnoLast==0 ){ 13161 pLvlOut->nSeg--; 13162 pStruct->nSegment--; 13163 } 13164 }else{ 13165 assert( pSeg->pgnoLast>0 ); 13166 fts5TrimSegments(p, pIter); 13167 pLvl->nMerge = nInput; 13168 } 13169 13170 fts5MultiIterFree(pIter); 13171 fts5BufferFree(&term); 13172 if( pnRem ) *pnRem -= writer.nLeafWritten; 13173 } 13174 13175 /* 13176 ** Do up to nPg pages of automerge work on the index. 13177 ** 13178 ** Return true if any changes were actually made, or false otherwise. 13179 */ 13180 static int fts5IndexMerge( 13181 Fts5Index *p, /* FTS5 backend object */ 13182 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 13183 int nPg, /* Pages of work to do */ 13184 int nMin /* Minimum number of segments to merge */ 13185 ){ 13186 int nRem = nPg; 13187 int bRet = 0; 13188 Fts5Structure *pStruct = *ppStruct; 13189 while( nRem>0 && p->rc==SQLITE_OK ){ 13190 int iLvl; /* To iterate through levels */ 13191 int iBestLvl = 0; /* Level offering the most input segments */ 13192 int nBest = 0; /* Number of input segments on best level */ 13193 13194 /* Set iBestLvl to the level to read input segments from. */ 13195 assert( pStruct->nLevel>0 ); 13196 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 13197 Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; 13198 if( pLvl->nMerge ){ 13199 if( pLvl->nMerge>nBest ){ 13200 iBestLvl = iLvl; 13201 nBest = pLvl->nMerge; 13202 } 13203 break; 13204 } 13205 if( pLvl->nSeg>nBest ){ 13206 nBest = pLvl->nSeg; 13207 iBestLvl = iLvl; 13208 } 13209 } 13210 13211 /* If nBest is still 0, then the index must be empty. */ 13212 #ifdef SQLITE_DEBUG 13213 for(iLvl=0; nBest==0 && iLvl<pStruct->nLevel; iLvl++){ 13214 assert( pStruct->aLevel[iLvl].nSeg==0 ); 13215 } 13216 #endif 13217 13218 if( nBest<nMin && pStruct->aLevel[iBestLvl].nMerge==0 ){ 13219 break; 13220 } 13221 bRet = 1; 13222 fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); 13223 if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ 13224 fts5StructurePromote(p, iBestLvl+1, pStruct); 13225 } 13226 } 13227 *ppStruct = pStruct; 13228 return bRet; 13229 } 13230 13231 /* 13232 ** A total of nLeaf leaf pages of data has just been flushed to a level-0 13233 ** segment. This function updates the write-counter accordingly and, if 13234 ** necessary, performs incremental merge work. 13235 ** 13236 ** If an error occurs, set the Fts5Index.rc error code. If an error has 13237 ** already occurred, this function is a no-op. 13238 */ 13239 static void fts5IndexAutomerge( 13240 Fts5Index *p, /* FTS5 backend object */ 13241 Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ 13242 int nLeaf /* Number of output leaves just written */ 13243 ){ 13244 if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 && ALWAYS((*ppStruct)!=0) ){ 13245 Fts5Structure *pStruct = *ppStruct; 13246 u64 nWrite; /* Initial value of write-counter */ 13247 int nWork; /* Number of work-quanta to perform */ 13248 int nRem; /* Number of leaf pages left to write */ 13249 13250 /* Update the write-counter. While doing so, set nWork. */ 13251 nWrite = pStruct->nWriteCounter; 13252 nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); 13253 pStruct->nWriteCounter += nLeaf; 13254 nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); 13255 13256 fts5IndexMerge(p, ppStruct, nRem, p->pConfig->nAutomerge); 13257 } 13258 } 13259 13260 static void fts5IndexCrisismerge( 13261 Fts5Index *p, /* FTS5 backend object */ 13262 Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ 13263 ){ 13264 const int nCrisis = p->pConfig->nCrisisMerge; 13265 Fts5Structure *pStruct = *ppStruct; 13266 int iLvl = 0; 13267 13268 assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); 13269 while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ 13270 fts5IndexMergeLevel(p, &pStruct, iLvl, 0); 13271 assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); 13272 fts5StructurePromote(p, iLvl+1, pStruct); 13273 iLvl++; 13274 } 13275 *ppStruct = pStruct; 13276 } 13277 13278 static int fts5IndexReturn(Fts5Index *p){ 13279 int rc = p->rc; 13280 p->rc = SQLITE_OK; 13281 return rc; 13282 } 13283 13284 typedef struct Fts5FlushCtx Fts5FlushCtx; 13285 struct Fts5FlushCtx { 13286 Fts5Index *pIdx; 13287 Fts5SegWriter writer; 13288 }; 13289 13290 /* 13291 ** Buffer aBuf[] contains a list of varints, all small enough to fit 13292 ** in a 32-bit integer. Return the size of the largest prefix of this 13293 ** list nMax bytes or less in size. 13294 */ 13295 static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ 13296 int ret; 13297 u32 dummy; 13298 ret = fts5GetVarint32(aBuf, dummy); 13299 if( ret<nMax ){ 13300 while( 1 ){ 13301 int i = fts5GetVarint32(&aBuf[ret], dummy); 13302 if( (ret + i) > nMax ) break; 13303 ret += i; 13304 } 13305 } 13306 return ret; 13307 } 13308 13309 /* 13310 ** Flush the contents of in-memory hash table iHash to a new level-0 13311 ** segment on disk. Also update the corresponding structure record. 13312 ** 13313 ** If an error occurs, set the Fts5Index.rc error code. If an error has 13314 ** already occurred, this function is a no-op. 13315 */ 13316 static void fts5FlushOneHash(Fts5Index *p){ 13317 Fts5Hash *pHash = p->pHash; 13318 Fts5Structure *pStruct; 13319 int iSegid; 13320 int pgnoLast = 0; /* Last leaf page number in segment */ 13321 13322 /* Obtain a reference to the index structure and allocate a new segment-id 13323 ** for the new level-0 segment. */ 13324 pStruct = fts5StructureRead(p); 13325 iSegid = fts5AllocateSegid(p, pStruct); 13326 fts5StructureInvalidate(p); 13327 13328 if( iSegid ){ 13329 const int pgsz = p->pConfig->pgsz; 13330 int eDetail = p->pConfig->eDetail; 13331 Fts5StructureSegment *pSeg; /* New segment within pStruct */ 13332 Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ 13333 Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ 13334 13335 Fts5SegWriter writer; 13336 fts5WriteInit(p, &writer, iSegid); 13337 13338 pBuf = &writer.writer.buf; 13339 pPgidx = &writer.writer.pgidx; 13340 13341 /* fts5WriteInit() should have initialized the buffers to (most likely) 13342 ** the maximum space required. */ 13343 assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); 13344 assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); 13345 13346 /* Begin scanning through hash table entries. This loop runs once for each 13347 ** term/doclist currently stored within the hash table. */ 13348 if( p->rc==SQLITE_OK ){ 13349 p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); 13350 } 13351 while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ 13352 const char *zTerm; /* Buffer containing term */ 13353 const u8 *pDoclist; /* Pointer to doclist for this term */ 13354 int nDoclist; /* Size of doclist in bytes */ 13355 13356 /* Write the term for this entry to disk. */ 13357 sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); 13358 fts5WriteAppendTerm(p, &writer, (int)strlen(zTerm), (const u8*)zTerm); 13359 if( p->rc!=SQLITE_OK ) break; 13360 13361 assert( writer.bFirstRowidInPage==0 ); 13362 if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ 13363 /* The entire doclist will fit on the current leaf. */ 13364 fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); 13365 }else{ 13366 i64 iRowid = 0; 13367 u64 iDelta = 0; 13368 int iOff = 0; 13369 13370 /* The entire doclist will not fit on this leaf. The following 13371 ** loop iterates through the poslists that make up the current 13372 ** doclist. */ 13373 while( p->rc==SQLITE_OK && iOff<nDoclist ){ 13374 iOff += fts5GetVarint(&pDoclist[iOff], &iDelta); 13375 iRowid += iDelta; 13376 13377 if( writer.bFirstRowidInPage ){ 13378 fts5PutU16(&pBuf->p[0], (u16)pBuf->n); /* first rowid on page */ 13379 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); 13380 writer.bFirstRowidInPage = 0; 13381 fts5WriteDlidxAppend(p, &writer, iRowid); 13382 if( p->rc!=SQLITE_OK ) break; 13383 }else{ 13384 pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); 13385 } 13386 assert( pBuf->n<=pBuf->nSpace ); 13387 13388 if( eDetail==FTS5_DETAIL_NONE ){ 13389 if( iOff<nDoclist && pDoclist[iOff]==0 ){ 13390 pBuf->p[pBuf->n++] = 0; 13391 iOff++; 13392 if( iOff<nDoclist && pDoclist[iOff]==0 ){ 13393 pBuf->p[pBuf->n++] = 0; 13394 iOff++; 13395 } 13396 } 13397 if( (pBuf->n + pPgidx->n)>=pgsz ){ 13398 fts5WriteFlushLeaf(p, &writer); 13399 } 13400 }else{ 13401 int bDummy; 13402 int nPos; 13403 int nCopy = fts5GetPoslistSize(&pDoclist[iOff], &nPos, &bDummy); 13404 nCopy += nPos; 13405 if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ 13406 /* The entire poslist will fit on the current leaf. So copy 13407 ** it in one go. */ 13408 fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); 13409 }else{ 13410 /* The entire poslist will not fit on this leaf. So it needs 13411 ** to be broken into sections. The only qualification being 13412 ** that each varint must be stored contiguously. */ 13413 const u8 *pPoslist = &pDoclist[iOff]; 13414 int iPos = 0; 13415 while( p->rc==SQLITE_OK ){ 13416 int nSpace = pgsz - pBuf->n - pPgidx->n; 13417 int n = 0; 13418 if( (nCopy - iPos)<=nSpace ){ 13419 n = nCopy - iPos; 13420 }else{ 13421 n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); 13422 } 13423 assert( n>0 ); 13424 fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); 13425 iPos += n; 13426 if( (pBuf->n + pPgidx->n)>=pgsz ){ 13427 fts5WriteFlushLeaf(p, &writer); 13428 } 13429 if( iPos>=nCopy ) break; 13430 } 13431 } 13432 iOff += nCopy; 13433 } 13434 } 13435 } 13436 13437 /* TODO2: Doclist terminator written here. */ 13438 /* pBuf->p[pBuf->n++] = '\0'; */ 13439 assert( pBuf->n<=pBuf->nSpace ); 13440 if( p->rc==SQLITE_OK ) sqlite3Fts5HashScanNext(pHash); 13441 } 13442 sqlite3Fts5HashClear(pHash); 13443 fts5WriteFinish(p, &writer, &pgnoLast); 13444 13445 /* Update the Fts5Structure. It is written back to the database by the 13446 ** fts5StructureRelease() call below. */ 13447 if( pStruct->nLevel==0 ){ 13448 fts5StructureAddLevel(&p->rc, &pStruct); 13449 } 13450 fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); 13451 if( p->rc==SQLITE_OK ){ 13452 pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; 13453 pSeg->iSegid = iSegid; 13454 pSeg->pgnoFirst = 1; 13455 pSeg->pgnoLast = pgnoLast; 13456 pStruct->nSegment++; 13457 } 13458 fts5StructurePromote(p, 0, pStruct); 13459 } 13460 13461 fts5IndexAutomerge(p, &pStruct, pgnoLast); 13462 fts5IndexCrisismerge(p, &pStruct); 13463 fts5StructureWrite(p, pStruct); 13464 fts5StructureRelease(pStruct); 13465 } 13466 13467 /* 13468 ** Flush any data stored in the in-memory hash tables to the database. 13469 */ 13470 static void fts5IndexFlush(Fts5Index *p){ 13471 /* Unless it is empty, flush the hash table to disk */ 13472 if( p->nPendingData ){ 13473 assert( p->pHash ); 13474 p->nPendingData = 0; 13475 fts5FlushOneHash(p); 13476 } 13477 } 13478 13479 static Fts5Structure *fts5IndexOptimizeStruct( 13480 Fts5Index *p, 13481 Fts5Structure *pStruct 13482 ){ 13483 Fts5Structure *pNew = 0; 13484 sqlite3_int64 nByte = sizeof(Fts5Structure); 13485 int nSeg = pStruct->nSegment; 13486 int i; 13487 13488 /* Figure out if this structure requires optimization. A structure does 13489 ** not require optimization if either: 13490 ** 13491 ** + it consists of fewer than two segments, or 13492 ** + all segments are on the same level, or 13493 ** + all segments except one are currently inputs to a merge operation. 13494 ** 13495 ** In the first case, return NULL. In the second, increment the ref-count 13496 ** on *pStruct and return a copy of the pointer to it. 13497 */ 13498 if( nSeg<2 ) return 0; 13499 for(i=0; i<pStruct->nLevel; i++){ 13500 int nThis = pStruct->aLevel[i].nSeg; 13501 if( nThis==nSeg || (nThis==nSeg-1 && pStruct->aLevel[i].nMerge==nThis) ){ 13502 fts5StructureRef(pStruct); 13503 return pStruct; 13504 } 13505 assert( pStruct->aLevel[i].nMerge<=nThis ); 13506 } 13507 13508 nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); 13509 pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); 13510 13511 if( pNew ){ 13512 Fts5StructureLevel *pLvl; 13513 nByte = nSeg * sizeof(Fts5StructureSegment); 13514 pNew->nLevel = pStruct->nLevel+1; 13515 pNew->nRef = 1; 13516 pNew->nWriteCounter = pStruct->nWriteCounter; 13517 pLvl = &pNew->aLevel[pStruct->nLevel]; 13518 pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); 13519 if( pLvl->aSeg ){ 13520 int iLvl, iSeg; 13521 int iSegOut = 0; 13522 /* Iterate through all segments, from oldest to newest. Add them to 13523 ** the new Fts5Level object so that pLvl->aSeg[0] is the oldest 13524 ** segment in the data structure. */ 13525 for(iLvl=pStruct->nLevel-1; iLvl>=0; iLvl--){ 13526 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 13527 pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; 13528 iSegOut++; 13529 } 13530 } 13531 pNew->nSegment = pLvl->nSeg = nSeg; 13532 }else{ 13533 sqlite3_free(pNew); 13534 pNew = 0; 13535 } 13536 } 13537 13538 return pNew; 13539 } 13540 13541 static int sqlite3Fts5IndexOptimize(Fts5Index *p){ 13542 Fts5Structure *pStruct; 13543 Fts5Structure *pNew = 0; 13544 13545 assert( p->rc==SQLITE_OK ); 13546 fts5IndexFlush(p); 13547 pStruct = fts5StructureRead(p); 13548 fts5StructureInvalidate(p); 13549 13550 if( pStruct ){ 13551 pNew = fts5IndexOptimizeStruct(p, pStruct); 13552 } 13553 fts5StructureRelease(pStruct); 13554 13555 assert( pNew==0 || pNew->nSegment>0 ); 13556 if( pNew ){ 13557 int iLvl; 13558 for(iLvl=0; pNew->aLevel[iLvl].nSeg==0; iLvl++){} 13559 while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ 13560 int nRem = FTS5_OPT_WORK_UNIT; 13561 fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); 13562 } 13563 13564 fts5StructureWrite(p, pNew); 13565 fts5StructureRelease(pNew); 13566 } 13567 13568 return fts5IndexReturn(p); 13569 } 13570 13571 /* 13572 ** This is called to implement the special "VALUES('merge', $nMerge)" 13573 ** INSERT command. 13574 */ 13575 static int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ 13576 Fts5Structure *pStruct = fts5StructureRead(p); 13577 if( pStruct ){ 13578 int nMin = p->pConfig->nUsermerge; 13579 fts5StructureInvalidate(p); 13580 if( nMerge<0 ){ 13581 Fts5Structure *pNew = fts5IndexOptimizeStruct(p, pStruct); 13582 fts5StructureRelease(pStruct); 13583 pStruct = pNew; 13584 nMin = 2; 13585 nMerge = nMerge*-1; 13586 } 13587 if( pStruct && pStruct->nLevel ){ 13588 if( fts5IndexMerge(p, &pStruct, nMerge, nMin) ){ 13589 fts5StructureWrite(p, pStruct); 13590 } 13591 } 13592 fts5StructureRelease(pStruct); 13593 } 13594 return fts5IndexReturn(p); 13595 } 13596 13597 static void fts5AppendRowid( 13598 Fts5Index *p, 13599 u64 iDelta, 13600 Fts5Iter *pUnused, 13601 Fts5Buffer *pBuf 13602 ){ 13603 UNUSED_PARAM(pUnused); 13604 fts5BufferAppendVarint(&p->rc, pBuf, iDelta); 13605 } 13606 13607 static void fts5AppendPoslist( 13608 Fts5Index *p, 13609 u64 iDelta, 13610 Fts5Iter *pMulti, 13611 Fts5Buffer *pBuf 13612 ){ 13613 int nData = pMulti->base.nData; 13614 int nByte = nData + 9 + 9 + FTS5_DATA_ZERO_PADDING; 13615 assert( nData>0 ); 13616 if( p->rc==SQLITE_OK && 0==fts5BufferGrow(&p->rc, pBuf, nByte) ){ 13617 fts5BufferSafeAppendVarint(pBuf, iDelta); 13618 fts5BufferSafeAppendVarint(pBuf, nData*2); 13619 fts5BufferSafeAppendBlob(pBuf, pMulti->base.pData, nData); 13620 memset(&pBuf->p[pBuf->n], 0, FTS5_DATA_ZERO_PADDING); 13621 } 13622 } 13623 13624 13625 static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ 13626 u8 *p = pIter->aPoslist + pIter->nSize + pIter->nPoslist; 13627 13628 assert( pIter->aPoslist || (p==0 && pIter->aPoslist==0) ); 13629 if( p>=pIter->aEof ){ 13630 pIter->aPoslist = 0; 13631 }else{ 13632 i64 iDelta; 13633 13634 p += fts5GetVarint(p, (u64*)&iDelta); 13635 pIter->iRowid += iDelta; 13636 13637 /* Read position list size */ 13638 if( p[0] & 0x80 ){ 13639 int nPos; 13640 pIter->nSize = fts5GetVarint32(p, nPos); 13641 pIter->nPoslist = (nPos>>1); 13642 }else{ 13643 pIter->nPoslist = ((int)(p[0])) >> 1; 13644 pIter->nSize = 1; 13645 } 13646 13647 pIter->aPoslist = p; 13648 if( &pIter->aPoslist[pIter->nPoslist]>pIter->aEof ){ 13649 pIter->aPoslist = 0; 13650 } 13651 } 13652 } 13653 13654 static void fts5DoclistIterInit( 13655 Fts5Buffer *pBuf, 13656 Fts5DoclistIter *pIter 13657 ){ 13658 memset(pIter, 0, sizeof(*pIter)); 13659 if( pBuf->n>0 ){ 13660 pIter->aPoslist = pBuf->p; 13661 pIter->aEof = &pBuf->p[pBuf->n]; 13662 fts5DoclistIterNext(pIter); 13663 } 13664 } 13665 13666 #if 0 13667 /* 13668 ** Append a doclist to buffer pBuf. 13669 ** 13670 ** This function assumes that space within the buffer has already been 13671 ** allocated. 13672 */ 13673 static void fts5MergeAppendDocid( 13674 Fts5Buffer *pBuf, /* Buffer to write to */ 13675 i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ 13676 i64 iRowid /* Rowid to append */ 13677 ){ 13678 assert( pBuf->n!=0 || (*piLastRowid)==0 ); 13679 fts5BufferSafeAppendVarint(pBuf, iRowid - *piLastRowid); 13680 *piLastRowid = iRowid; 13681 } 13682 #endif 13683 13684 #define fts5MergeAppendDocid(pBuf, iLastRowid, iRowid) { \ 13685 assert( (pBuf)->n!=0 || (iLastRowid)==0 ); \ 13686 fts5BufferSafeAppendVarint((pBuf), (u64)(iRowid) - (u64)(iLastRowid)); \ 13687 (iLastRowid) = (iRowid); \ 13688 } 13689 13690 /* 13691 ** Swap the contents of buffer *p1 with that of *p2. 13692 */ 13693 static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ 13694 Fts5Buffer tmp = *p1; 13695 *p1 = *p2; 13696 *p2 = tmp; 13697 } 13698 13699 static void fts5NextRowid(Fts5Buffer *pBuf, int *piOff, i64 *piRowid){ 13700 int i = *piOff; 13701 if( i>=pBuf->n ){ 13702 *piOff = -1; 13703 }else{ 13704 u64 iVal; 13705 *piOff = i + sqlite3Fts5GetVarint(&pBuf->p[i], &iVal); 13706 *piRowid += iVal; 13707 } 13708 } 13709 13710 /* 13711 ** This is the equivalent of fts5MergePrefixLists() for detail=none mode. 13712 ** In this case the buffers consist of a delta-encoded list of rowids only. 13713 */ 13714 static void fts5MergeRowidLists( 13715 Fts5Index *p, /* FTS5 backend object */ 13716 Fts5Buffer *p1, /* First list to merge */ 13717 int nBuf, /* Number of entries in apBuf[] */ 13718 Fts5Buffer *aBuf /* Array of other lists to merge into p1 */ 13719 ){ 13720 int i1 = 0; 13721 int i2 = 0; 13722 i64 iRowid1 = 0; 13723 i64 iRowid2 = 0; 13724 i64 iOut = 0; 13725 Fts5Buffer *p2 = &aBuf[0]; 13726 Fts5Buffer out; 13727 13728 (void)nBuf; 13729 memset(&out, 0, sizeof(out)); 13730 assert( nBuf==1 ); 13731 sqlite3Fts5BufferSize(&p->rc, &out, p1->n + p2->n); 13732 if( p->rc ) return; 13733 13734 fts5NextRowid(p1, &i1, &iRowid1); 13735 fts5NextRowid(p2, &i2, &iRowid2); 13736 while( i1>=0 || i2>=0 ){ 13737 if( i1>=0 && (i2<0 || iRowid1<iRowid2) ){ 13738 assert( iOut==0 || iRowid1>iOut ); 13739 fts5BufferSafeAppendVarint(&out, iRowid1 - iOut); 13740 iOut = iRowid1; 13741 fts5NextRowid(p1, &i1, &iRowid1); 13742 }else{ 13743 assert( iOut==0 || iRowid2>iOut ); 13744 fts5BufferSafeAppendVarint(&out, iRowid2 - iOut); 13745 iOut = iRowid2; 13746 if( i1>=0 && iRowid1==iRowid2 ){ 13747 fts5NextRowid(p1, &i1, &iRowid1); 13748 } 13749 fts5NextRowid(p2, &i2, &iRowid2); 13750 } 13751 } 13752 13753 fts5BufferSwap(&out, p1); 13754 fts5BufferFree(&out); 13755 } 13756 13757 typedef struct PrefixMerger PrefixMerger; 13758 struct PrefixMerger { 13759 Fts5DoclistIter iter; /* Doclist iterator */ 13760 i64 iPos; /* For iterating through a position list */ 13761 int iOff; 13762 u8 *aPos; 13763 PrefixMerger *pNext; /* Next in docid/poslist order */ 13764 }; 13765 13766 static void fts5PrefixMergerInsertByRowid( 13767 PrefixMerger **ppHead, 13768 PrefixMerger *p 13769 ){ 13770 if( p->iter.aPoslist ){ 13771 PrefixMerger **pp = ppHead; 13772 while( *pp && p->iter.iRowid>(*pp)->iter.iRowid ){ 13773 pp = &(*pp)->pNext; 13774 } 13775 p->pNext = *pp; 13776 *pp = p; 13777 } 13778 } 13779 13780 static void fts5PrefixMergerInsertByPosition( 13781 PrefixMerger **ppHead, 13782 PrefixMerger *p 13783 ){ 13784 if( p->iPos>=0 ){ 13785 PrefixMerger **pp = ppHead; 13786 while( *pp && p->iPos>(*pp)->iPos ){ 13787 pp = &(*pp)->pNext; 13788 } 13789 p->pNext = *pp; 13790 *pp = p; 13791 } 13792 } 13793 13794 13795 /* 13796 ** Array aBuf[] contains nBuf doclists. These are all merged in with the 13797 ** doclist in buffer p1. 13798 */ 13799 static void fts5MergePrefixLists( 13800 Fts5Index *p, /* FTS5 backend object */ 13801 Fts5Buffer *p1, /* First list to merge */ 13802 int nBuf, /* Number of buffers in array aBuf[] */ 13803 Fts5Buffer *aBuf /* Other lists to merge in */ 13804 ){ 13805 #define fts5PrefixMergerNextPosition(p) \ 13806 sqlite3Fts5PoslistNext64((p)->aPos,(p)->iter.nPoslist,&(p)->iOff,&(p)->iPos) 13807 #define FTS5_MERGE_NLIST 16 13808 PrefixMerger aMerger[FTS5_MERGE_NLIST]; 13809 PrefixMerger *pHead = 0; 13810 int i; 13811 int nOut = 0; 13812 Fts5Buffer out = {0, 0, 0}; 13813 Fts5Buffer tmp = {0, 0, 0}; 13814 i64 iLastRowid = 0; 13815 13816 /* Initialize a doclist-iterator for each input buffer. Arrange them in 13817 ** a linked-list starting at pHead in ascending order of rowid. Avoid 13818 ** linking any iterators already at EOF into the linked list at all. */ 13819 assert( nBuf+1<=sizeof(aMerger)/sizeof(aMerger[0]) ); 13820 memset(aMerger, 0, sizeof(PrefixMerger)*(nBuf+1)); 13821 pHead = &aMerger[nBuf]; 13822 fts5DoclistIterInit(p1, &pHead->iter); 13823 for(i=0; i<nBuf; i++){ 13824 fts5DoclistIterInit(&aBuf[i], &aMerger[i].iter); 13825 fts5PrefixMergerInsertByRowid(&pHead, &aMerger[i]); 13826 nOut += aBuf[i].n; 13827 } 13828 if( nOut==0 ) return; 13829 nOut += p1->n + 9 + 10*nBuf; 13830 13831 /* The maximum size of the output is equal to the sum of the 13832 ** input sizes + 1 varint (9 bytes). The extra varint is because if the 13833 ** first rowid in one input is a large negative number, and the first in 13834 ** the other a non-negative number, the delta for the non-negative 13835 ** number will be larger on disk than the literal integer value 13836 ** was. 13837 ** 13838 ** Or, if the input position-lists are corrupt, then the output might 13839 ** include up to (nBuf+1) extra 10-byte positions created by interpreting -1 13840 ** (the value PoslistNext64() uses for EOF) as a position and appending 13841 ** it to the output. This can happen at most once for each input 13842 ** position-list, hence (nBuf+1) 10 byte paddings. */ 13843 if( sqlite3Fts5BufferSize(&p->rc, &out, nOut) ) return; 13844 13845 while( pHead ){ 13846 fts5MergeAppendDocid(&out, iLastRowid, pHead->iter.iRowid); 13847 13848 if( pHead->pNext && iLastRowid==pHead->pNext->iter.iRowid ){ 13849 /* Merge data from two or more poslists */ 13850 i64 iPrev = 0; 13851 int nTmp = FTS5_DATA_ZERO_PADDING; 13852 int nMerge = 0; 13853 PrefixMerger *pSave = pHead; 13854 PrefixMerger *pThis = 0; 13855 int nTail = 0; 13856 13857 pHead = 0; 13858 while( pSave && pSave->iter.iRowid==iLastRowid ){ 13859 PrefixMerger *pNext = pSave->pNext; 13860 pSave->iOff = 0; 13861 pSave->iPos = 0; 13862 pSave->aPos = &pSave->iter.aPoslist[pSave->iter.nSize]; 13863 fts5PrefixMergerNextPosition(pSave); 13864 nTmp += pSave->iter.nPoslist + 10; 13865 nMerge++; 13866 fts5PrefixMergerInsertByPosition(&pHead, pSave); 13867 pSave = pNext; 13868 } 13869 13870 if( pHead==0 || pHead->pNext==0 ){ 13871 p->rc = FTS5_CORRUPT; 13872 break; 13873 } 13874 13875 /* See the earlier comment in this function for an explanation of why 13876 ** corrupt input position lists might cause the output to consume 13877 ** at most nMerge*10 bytes of unexpected space. */ 13878 if( sqlite3Fts5BufferSize(&p->rc, &tmp, nTmp+nMerge*10) ){ 13879 break; 13880 } 13881 fts5BufferZero(&tmp); 13882 13883 pThis = pHead; 13884 pHead = pThis->pNext; 13885 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); 13886 fts5PrefixMergerNextPosition(pThis); 13887 fts5PrefixMergerInsertByPosition(&pHead, pThis); 13888 13889 while( pHead->pNext ){ 13890 pThis = pHead; 13891 if( pThis->iPos!=iPrev ){ 13892 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pThis->iPos); 13893 } 13894 fts5PrefixMergerNextPosition(pThis); 13895 pHead = pThis->pNext; 13896 fts5PrefixMergerInsertByPosition(&pHead, pThis); 13897 } 13898 13899 if( pHead->iPos!=iPrev ){ 13900 sqlite3Fts5PoslistSafeAppend(&tmp, &iPrev, pHead->iPos); 13901 } 13902 nTail = pHead->iter.nPoslist - pHead->iOff; 13903 13904 /* WRITEPOSLISTSIZE */ 13905 assert_nc( tmp.n+nTail<=nTmp ); 13906 assert( tmp.n+nTail<=nTmp+nMerge*10 ); 13907 if( tmp.n+nTail>nTmp-FTS5_DATA_ZERO_PADDING ){ 13908 if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; 13909 break; 13910 } 13911 fts5BufferSafeAppendVarint(&out, (tmp.n+nTail) * 2); 13912 fts5BufferSafeAppendBlob(&out, tmp.p, tmp.n); 13913 if( nTail>0 ){ 13914 fts5BufferSafeAppendBlob(&out, &pHead->aPos[pHead->iOff], nTail); 13915 } 13916 13917 pHead = pSave; 13918 for(i=0; i<nBuf+1; i++){ 13919 PrefixMerger *pX = &aMerger[i]; 13920 if( pX->iter.aPoslist && pX->iter.iRowid==iLastRowid ){ 13921 fts5DoclistIterNext(&pX->iter); 13922 fts5PrefixMergerInsertByRowid(&pHead, pX); 13923 } 13924 } 13925 13926 }else{ 13927 /* Copy poslist from pHead to output */ 13928 PrefixMerger *pThis = pHead; 13929 Fts5DoclistIter *pI = &pThis->iter; 13930 fts5BufferSafeAppendBlob(&out, pI->aPoslist, pI->nPoslist+pI->nSize); 13931 fts5DoclistIterNext(pI); 13932 pHead = pThis->pNext; 13933 fts5PrefixMergerInsertByRowid(&pHead, pThis); 13934 } 13935 } 13936 13937 fts5BufferFree(p1); 13938 fts5BufferFree(&tmp); 13939 memset(&out.p[out.n], 0, FTS5_DATA_ZERO_PADDING); 13940 *p1 = out; 13941 } 13942 13943 static void fts5SetupPrefixIter( 13944 Fts5Index *p, /* Index to read from */ 13945 int bDesc, /* True for "ORDER BY rowid DESC" */ 13946 int iIdx, /* Index to scan for data */ 13947 u8 *pToken, /* Buffer containing prefix to match */ 13948 int nToken, /* Size of buffer pToken in bytes */ 13949 Fts5Colset *pColset, /* Restrict matches to these columns */ 13950 Fts5Iter **ppIter /* OUT: New iterator */ 13951 ){ 13952 Fts5Structure *pStruct; 13953 Fts5Buffer *aBuf; 13954 int nBuf = 32; 13955 int nMerge = 1; 13956 13957 void (*xMerge)(Fts5Index*, Fts5Buffer*, int, Fts5Buffer*); 13958 void (*xAppend)(Fts5Index*, u64, Fts5Iter*, Fts5Buffer*); 13959 if( p->pConfig->eDetail==FTS5_DETAIL_NONE ){ 13960 xMerge = fts5MergeRowidLists; 13961 xAppend = fts5AppendRowid; 13962 }else{ 13963 nMerge = FTS5_MERGE_NLIST-1; 13964 nBuf = nMerge*8; /* Sufficient to merge (16^8)==(2^32) lists */ 13965 xMerge = fts5MergePrefixLists; 13966 xAppend = fts5AppendPoslist; 13967 } 13968 13969 aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); 13970 pStruct = fts5StructureRead(p); 13971 13972 if( aBuf && pStruct ){ 13973 const int flags = FTS5INDEX_QUERY_SCAN 13974 | FTS5INDEX_QUERY_SKIPEMPTY 13975 | FTS5INDEX_QUERY_NOOUTPUT; 13976 int i; 13977 i64 iLastRowid = 0; 13978 Fts5Iter *p1 = 0; /* Iterator used to gather data from index */ 13979 Fts5Data *pData; 13980 Fts5Buffer doclist; 13981 int bNewTerm = 1; 13982 13983 memset(&doclist, 0, sizeof(doclist)); 13984 if( iIdx!=0 ){ 13985 int dummy = 0; 13986 const int f2 = FTS5INDEX_QUERY_SKIPEMPTY|FTS5INDEX_QUERY_NOOUTPUT; 13987 pToken[0] = FTS5_MAIN_PREFIX; 13988 fts5MultiIterNew(p, pStruct, f2, pColset, pToken, nToken, -1, 0, &p1); 13989 fts5IterSetOutputCb(&p->rc, p1); 13990 for(; 13991 fts5MultiIterEof(p, p1)==0; 13992 fts5MultiIterNext2(p, p1, &dummy) 13993 ){ 13994 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; 13995 p1->xSetOutputs(p1, pSeg); 13996 if( p1->base.nData ){ 13997 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); 13998 iLastRowid = p1->base.iRowid; 13999 } 14000 } 14001 fts5MultiIterFree(p1); 14002 } 14003 14004 pToken[0] = FTS5_MAIN_PREFIX + iIdx; 14005 fts5MultiIterNew(p, pStruct, flags, pColset, pToken, nToken, -1, 0, &p1); 14006 fts5IterSetOutputCb(&p->rc, p1); 14007 for( /* no-op */ ; 14008 fts5MultiIterEof(p, p1)==0; 14009 fts5MultiIterNext2(p, p1, &bNewTerm) 14010 ){ 14011 Fts5SegIter *pSeg = &p1->aSeg[ p1->aFirst[1].iFirst ]; 14012 int nTerm = pSeg->term.n; 14013 const u8 *pTerm = pSeg->term.p; 14014 p1->xSetOutputs(p1, pSeg); 14015 14016 assert_nc( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); 14017 if( bNewTerm ){ 14018 if( nTerm<nToken || memcmp(pToken, pTerm, nToken) ) break; 14019 } 14020 14021 if( p1->base.nData==0 ) continue; 14022 14023 if( p1->base.iRowid<=iLastRowid && doclist.n>0 ){ 14024 for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ 14025 int i1 = i*nMerge; 14026 int iStore; 14027 assert( i1+nMerge<=nBuf ); 14028 for(iStore=i1; iStore<i1+nMerge; iStore++){ 14029 if( aBuf[iStore].n==0 ){ 14030 fts5BufferSwap(&doclist, &aBuf[iStore]); 14031 fts5BufferZero(&doclist); 14032 break; 14033 } 14034 } 14035 if( iStore==i1+nMerge ){ 14036 xMerge(p, &doclist, nMerge, &aBuf[i1]); 14037 for(iStore=i1; iStore<i1+nMerge; iStore++){ 14038 fts5BufferZero(&aBuf[iStore]); 14039 } 14040 } 14041 } 14042 iLastRowid = 0; 14043 } 14044 14045 xAppend(p, (u64)p1->base.iRowid-(u64)iLastRowid, p1, &doclist); 14046 iLastRowid = p1->base.iRowid; 14047 } 14048 14049 assert( (nBuf%nMerge)==0 ); 14050 for(i=0; i<nBuf; i+=nMerge){ 14051 int iFree; 14052 if( p->rc==SQLITE_OK ){ 14053 xMerge(p, &doclist, nMerge, &aBuf[i]); 14054 } 14055 for(iFree=i; iFree<i+nMerge; iFree++){ 14056 fts5BufferFree(&aBuf[iFree]); 14057 } 14058 } 14059 fts5MultiIterFree(p1); 14060 14061 pData = fts5IdxMalloc(p, sizeof(Fts5Data)+doclist.n+FTS5_DATA_ZERO_PADDING); 14062 if( pData ){ 14063 pData->p = (u8*)&pData[1]; 14064 pData->nn = pData->szLeaf = doclist.n; 14065 if( doclist.n ) memcpy(pData->p, doclist.p, doclist.n); 14066 fts5MultiIterNew2(p, pData, bDesc, ppIter); 14067 } 14068 fts5BufferFree(&doclist); 14069 } 14070 14071 fts5StructureRelease(pStruct); 14072 sqlite3_free(aBuf); 14073 } 14074 14075 14076 /* 14077 ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain 14078 ** to the document with rowid iRowid. 14079 */ 14080 static int sqlite3Fts5IndexBeginWrite(Fts5Index *p, int bDelete, i64 iRowid){ 14081 assert( p->rc==SQLITE_OK ); 14082 14083 /* Allocate the hash table if it has not already been allocated */ 14084 if( p->pHash==0 ){ 14085 p->rc = sqlite3Fts5HashNew(p->pConfig, &p->pHash, &p->nPendingData); 14086 } 14087 14088 /* Flush the hash table to disk if required */ 14089 if( iRowid<p->iWriteRowid 14090 || (iRowid==p->iWriteRowid && p->bDelete==0) 14091 || (p->nPendingData > p->pConfig->nHashSize) 14092 ){ 14093 fts5IndexFlush(p); 14094 } 14095 14096 p->iWriteRowid = iRowid; 14097 p->bDelete = bDelete; 14098 return fts5IndexReturn(p); 14099 } 14100 14101 /* 14102 ** Commit data to disk. 14103 */ 14104 static int sqlite3Fts5IndexSync(Fts5Index *p){ 14105 assert( p->rc==SQLITE_OK ); 14106 fts5IndexFlush(p); 14107 sqlite3Fts5IndexCloseReader(p); 14108 return fts5IndexReturn(p); 14109 } 14110 14111 /* 14112 ** Discard any data stored in the in-memory hash tables. Do not write it 14113 ** to the database. Additionally, assume that the contents of the %_data 14114 ** table may have changed on disk. So any in-memory caches of %_data 14115 ** records must be invalidated. 14116 */ 14117 static int sqlite3Fts5IndexRollback(Fts5Index *p){ 14118 sqlite3Fts5IndexCloseReader(p); 14119 fts5IndexDiscardData(p); 14120 fts5StructureInvalidate(p); 14121 /* assert( p->rc==SQLITE_OK ); */ 14122 return SQLITE_OK; 14123 } 14124 14125 /* 14126 ** The %_data table is completely empty when this function is called. This 14127 ** function populates it with the initial structure objects for each index, 14128 ** and the initial version of the "averages" record (a zero-byte blob). 14129 */ 14130 static int sqlite3Fts5IndexReinit(Fts5Index *p){ 14131 Fts5Structure s; 14132 fts5StructureInvalidate(p); 14133 fts5IndexDiscardData(p); 14134 memset(&s, 0, sizeof(Fts5Structure)); 14135 fts5DataWrite(p, FTS5_AVERAGES_ROWID, (const u8*)"", 0); 14136 fts5StructureWrite(p, &s); 14137 return fts5IndexReturn(p); 14138 } 14139 14140 /* 14141 ** Open a new Fts5Index handle. If the bCreate argument is true, create 14142 ** and initialize the underlying %_data table. 14143 ** 14144 ** If successful, set *pp to point to the new object and return SQLITE_OK. 14145 ** Otherwise, set *pp to NULL and return an SQLite error code. 14146 */ 14147 static int sqlite3Fts5IndexOpen( 14148 Fts5Config *pConfig, 14149 int bCreate, 14150 Fts5Index **pp, 14151 char **pzErr 14152 ){ 14153 int rc = SQLITE_OK; 14154 Fts5Index *p; /* New object */ 14155 14156 *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); 14157 if( rc==SQLITE_OK ){ 14158 p->pConfig = pConfig; 14159 p->nWorkUnit = FTS5_WORK_UNIT; 14160 p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); 14161 if( p->zDataTbl && bCreate ){ 14162 rc = sqlite3Fts5CreateTable( 14163 pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr 14164 ); 14165 if( rc==SQLITE_OK ){ 14166 rc = sqlite3Fts5CreateTable(pConfig, "idx", 14167 "segid, term, pgno, PRIMARY KEY(segid, term)", 14168 1, pzErr 14169 ); 14170 } 14171 if( rc==SQLITE_OK ){ 14172 rc = sqlite3Fts5IndexReinit(p); 14173 } 14174 } 14175 } 14176 14177 assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); 14178 if( rc ){ 14179 sqlite3Fts5IndexClose(p); 14180 *pp = 0; 14181 } 14182 return rc; 14183 } 14184 14185 /* 14186 ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). 14187 */ 14188 static int sqlite3Fts5IndexClose(Fts5Index *p){ 14189 int rc = SQLITE_OK; 14190 if( p ){ 14191 assert( p->pReader==0 ); 14192 fts5StructureInvalidate(p); 14193 sqlite3_finalize(p->pWriter); 14194 sqlite3_finalize(p->pDeleter); 14195 sqlite3_finalize(p->pIdxWriter); 14196 sqlite3_finalize(p->pIdxDeleter); 14197 sqlite3_finalize(p->pIdxSelect); 14198 sqlite3_finalize(p->pDataVersion); 14199 sqlite3Fts5HashFree(p->pHash); 14200 sqlite3_free(p->zDataTbl); 14201 sqlite3_free(p); 14202 } 14203 return rc; 14204 } 14205 14206 /* 14207 ** Argument p points to a buffer containing utf-8 text that is n bytes in 14208 ** size. Return the number of bytes in the nChar character prefix of the 14209 ** buffer, or 0 if there are less than nChar characters in total. 14210 */ 14211 static int sqlite3Fts5IndexCharlenToBytelen( 14212 const char *p, 14213 int nByte, 14214 int nChar 14215 ){ 14216 int n = 0; 14217 int i; 14218 for(i=0; i<nChar; i++){ 14219 if( n>=nByte ) return 0; /* Input contains fewer than nChar chars */ 14220 if( (unsigned char)p[n++]>=0xc0 ){ 14221 if( n>=nByte ) return 0; 14222 while( (p[n] & 0xc0)==0x80 ){ 14223 n++; 14224 if( n>=nByte ){ 14225 if( i+1==nChar ) break; 14226 return 0; 14227 } 14228 } 14229 } 14230 } 14231 return n; 14232 } 14233 14234 /* 14235 ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of 14236 ** unicode characters in the string. 14237 */ 14238 static int fts5IndexCharlen(const char *pIn, int nIn){ 14239 int nChar = 0; 14240 int i = 0; 14241 while( i<nIn ){ 14242 if( (unsigned char)pIn[i++]>=0xc0 ){ 14243 while( i<nIn && (pIn[i] & 0xc0)==0x80 ) i++; 14244 } 14245 nChar++; 14246 } 14247 return nChar; 14248 } 14249 14250 /* 14251 ** Insert or remove data to or from the index. Each time a document is 14252 ** added to or removed from the index, this function is called one or more 14253 ** times. 14254 ** 14255 ** For an insert, it must be called once for each token in the new document. 14256 ** If the operation is a delete, it must be called (at least) once for each 14257 ** unique token in the document with an iCol value less than zero. The iPos 14258 ** argument is ignored for a delete. 14259 */ 14260 static int sqlite3Fts5IndexWrite( 14261 Fts5Index *p, /* Index to write to */ 14262 int iCol, /* Column token appears in (-ve -> delete) */ 14263 int iPos, /* Position of token within column */ 14264 const char *pToken, int nToken /* Token to add or remove to or from index */ 14265 ){ 14266 int i; /* Used to iterate through indexes */ 14267 int rc = SQLITE_OK; /* Return code */ 14268 Fts5Config *pConfig = p->pConfig; 14269 14270 assert( p->rc==SQLITE_OK ); 14271 assert( (iCol<0)==p->bDelete ); 14272 14273 /* Add the entry to the main terms index. */ 14274 rc = sqlite3Fts5HashWrite( 14275 p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken 14276 ); 14277 14278 for(i=0; i<pConfig->nPrefix && rc==SQLITE_OK; i++){ 14279 const int nChar = pConfig->aPrefix[i]; 14280 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); 14281 if( nByte ){ 14282 rc = sqlite3Fts5HashWrite(p->pHash, 14283 p->iWriteRowid, iCol, iPos, (char)(FTS5_MAIN_PREFIX+i+1), pToken, 14284 nByte 14285 ); 14286 } 14287 } 14288 14289 return rc; 14290 } 14291 14292 /* 14293 ** Open a new iterator to iterate though all rowid that match the 14294 ** specified token or token prefix. 14295 */ 14296 static int sqlite3Fts5IndexQuery( 14297 Fts5Index *p, /* FTS index to query */ 14298 const char *pToken, int nToken, /* Token (or prefix) to query for */ 14299 int flags, /* Mask of FTS5INDEX_QUERY_X flags */ 14300 Fts5Colset *pColset, /* Match these columns only */ 14301 Fts5IndexIter **ppIter /* OUT: New iterator object */ 14302 ){ 14303 Fts5Config *pConfig = p->pConfig; 14304 Fts5Iter *pRet = 0; 14305 Fts5Buffer buf = {0, 0, 0}; 14306 14307 /* If the QUERY_SCAN flag is set, all other flags must be clear. */ 14308 assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || flags==FTS5INDEX_QUERY_SCAN ); 14309 14310 if( sqlite3Fts5BufferSize(&p->rc, &buf, nToken+1)==0 ){ 14311 int iIdx = 0; /* Index to search */ 14312 int iPrefixIdx = 0; /* +1 prefix index */ 14313 if( nToken>0 ) memcpy(&buf.p[1], pToken, nToken); 14314 14315 /* Figure out which index to search and set iIdx accordingly. If this 14316 ** is a prefix query for which there is no prefix index, set iIdx to 14317 ** greater than pConfig->nPrefix to indicate that the query will be 14318 ** satisfied by scanning multiple terms in the main index. 14319 ** 14320 ** If the QUERY_TEST_NOIDX flag was specified, then this must be a 14321 ** prefix-query. Instead of using a prefix-index (if one exists), 14322 ** evaluate the prefix query using the main FTS index. This is used 14323 ** for internal sanity checking by the integrity-check in debug 14324 ** mode only. */ 14325 #ifdef SQLITE_DEBUG 14326 if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ 14327 assert( flags & FTS5INDEX_QUERY_PREFIX ); 14328 iIdx = 1+pConfig->nPrefix; 14329 }else 14330 #endif 14331 if( flags & FTS5INDEX_QUERY_PREFIX ){ 14332 int nChar = fts5IndexCharlen(pToken, nToken); 14333 for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ 14334 int nIdxChar = pConfig->aPrefix[iIdx-1]; 14335 if( nIdxChar==nChar ) break; 14336 if( nIdxChar==nChar+1 ) iPrefixIdx = iIdx; 14337 } 14338 } 14339 14340 if( iIdx<=pConfig->nPrefix ){ 14341 /* Straight index lookup */ 14342 Fts5Structure *pStruct = fts5StructureRead(p); 14343 buf.p[0] = (u8)(FTS5_MAIN_PREFIX + iIdx); 14344 if( pStruct ){ 14345 fts5MultiIterNew(p, pStruct, flags | FTS5INDEX_QUERY_SKIPEMPTY, 14346 pColset, buf.p, nToken+1, -1, 0, &pRet 14347 ); 14348 fts5StructureRelease(pStruct); 14349 } 14350 }else{ 14351 /* Scan multiple terms in the main index */ 14352 int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; 14353 fts5SetupPrefixIter(p, bDesc, iPrefixIdx, buf.p, nToken+1, pColset,&pRet); 14354 if( pRet==0 ){ 14355 assert( p->rc!=SQLITE_OK ); 14356 }else{ 14357 assert( pRet->pColset==0 ); 14358 fts5IterSetOutputCb(&p->rc, pRet); 14359 if( p->rc==SQLITE_OK ){ 14360 Fts5SegIter *pSeg = &pRet->aSeg[pRet->aFirst[1].iFirst]; 14361 if( pSeg->pLeaf ) pRet->xSetOutputs(pRet, pSeg); 14362 } 14363 } 14364 } 14365 14366 if( p->rc ){ 14367 sqlite3Fts5IterClose((Fts5IndexIter*)pRet); 14368 pRet = 0; 14369 sqlite3Fts5IndexCloseReader(p); 14370 } 14371 14372 *ppIter = (Fts5IndexIter*)pRet; 14373 sqlite3Fts5BufferFree(&buf); 14374 } 14375 return fts5IndexReturn(p); 14376 } 14377 14378 /* 14379 ** Return true if the iterator passed as the only argument is at EOF. 14380 */ 14381 /* 14382 ** Move to the next matching rowid. 14383 */ 14384 static int sqlite3Fts5IterNext(Fts5IndexIter *pIndexIter){ 14385 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 14386 assert( pIter->pIndex->rc==SQLITE_OK ); 14387 fts5MultiIterNext(pIter->pIndex, pIter, 0, 0); 14388 return fts5IndexReturn(pIter->pIndex); 14389 } 14390 14391 /* 14392 ** Move to the next matching term/rowid. Used by the fts5vocab module. 14393 */ 14394 static int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ 14395 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 14396 Fts5Index *p = pIter->pIndex; 14397 14398 assert( pIter->pIndex->rc==SQLITE_OK ); 14399 14400 fts5MultiIterNext(p, pIter, 0, 0); 14401 if( p->rc==SQLITE_OK ){ 14402 Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; 14403 if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ 14404 fts5DataRelease(pSeg->pLeaf); 14405 pSeg->pLeaf = 0; 14406 pIter->base.bEof = 1; 14407 } 14408 } 14409 14410 return fts5IndexReturn(pIter->pIndex); 14411 } 14412 14413 /* 14414 ** Move to the next matching rowid that occurs at or after iMatch. The 14415 ** definition of "at or after" depends on whether this iterator iterates 14416 ** in ascending or descending rowid order. 14417 */ 14418 static int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ 14419 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 14420 fts5MultiIterNextFrom(pIter->pIndex, pIter, iMatch); 14421 return fts5IndexReturn(pIter->pIndex); 14422 } 14423 14424 /* 14425 ** Return the current term. 14426 */ 14427 static const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIndexIter, int *pn){ 14428 int n; 14429 const char *z = (const char*)fts5MultiIterTerm((Fts5Iter*)pIndexIter, &n); 14430 assert_nc( z || n<=1 ); 14431 *pn = n-1; 14432 return (z ? &z[1] : 0); 14433 } 14434 14435 /* 14436 ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). 14437 */ 14438 static void sqlite3Fts5IterClose(Fts5IndexIter *pIndexIter){ 14439 if( pIndexIter ){ 14440 Fts5Iter *pIter = (Fts5Iter*)pIndexIter; 14441 Fts5Index *pIndex = pIter->pIndex; 14442 fts5MultiIterFree(pIter); 14443 sqlite3Fts5IndexCloseReader(pIndex); 14444 } 14445 } 14446 14447 /* 14448 ** Read and decode the "averages" record from the database. 14449 ** 14450 ** Parameter anSize must point to an array of size nCol, where nCol is 14451 ** the number of user defined columns in the FTS table. 14452 */ 14453 static int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ 14454 int nCol = p->pConfig->nCol; 14455 Fts5Data *pData; 14456 14457 *pnRow = 0; 14458 memset(anSize, 0, sizeof(i64) * nCol); 14459 pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); 14460 if( p->rc==SQLITE_OK && pData->nn ){ 14461 int i = 0; 14462 int iCol; 14463 i += fts5GetVarint(&pData->p[i], (u64*)pnRow); 14464 for(iCol=0; i<pData->nn && iCol<nCol; iCol++){ 14465 i += fts5GetVarint(&pData->p[i], (u64*)&anSize[iCol]); 14466 } 14467 } 14468 14469 fts5DataRelease(pData); 14470 return fts5IndexReturn(p); 14471 } 14472 14473 /* 14474 ** Replace the current "averages" record with the contents of the buffer 14475 ** supplied as the second argument. 14476 */ 14477 static int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ 14478 assert( p->rc==SQLITE_OK ); 14479 fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); 14480 return fts5IndexReturn(p); 14481 } 14482 14483 /* 14484 ** Return the total number of blocks this module has read from the %_data 14485 ** table since it was created. 14486 */ 14487 static int sqlite3Fts5IndexReads(Fts5Index *p){ 14488 return p->nRead; 14489 } 14490 14491 /* 14492 ** Set the 32-bit cookie value stored at the start of all structure 14493 ** records to the value passed as the second argument. 14494 ** 14495 ** Return SQLITE_OK if successful, or an SQLite error code if an error 14496 ** occurs. 14497 */ 14498 static int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ 14499 int rc; /* Return code */ 14500 Fts5Config *pConfig = p->pConfig; /* Configuration object */ 14501 u8 aCookie[4]; /* Binary representation of iNew */ 14502 sqlite3_blob *pBlob = 0; 14503 14504 assert( p->rc==SQLITE_OK ); 14505 sqlite3Fts5Put32(aCookie, iNew); 14506 14507 rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, 14508 "block", FTS5_STRUCTURE_ROWID, 1, &pBlob 14509 ); 14510 if( rc==SQLITE_OK ){ 14511 sqlite3_blob_write(pBlob, aCookie, 4, 0); 14512 rc = sqlite3_blob_close(pBlob); 14513 } 14514 14515 return rc; 14516 } 14517 14518 static int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ 14519 Fts5Structure *pStruct; 14520 pStruct = fts5StructureRead(p); 14521 fts5StructureRelease(pStruct); 14522 return fts5IndexReturn(p); 14523 } 14524 14525 14526 /************************************************************************* 14527 ************************************************************************** 14528 ** Below this point is the implementation of the integrity-check 14529 ** functionality. 14530 */ 14531 14532 /* 14533 ** Return a simple checksum value based on the arguments. 14534 */ 14535 static u64 sqlite3Fts5IndexEntryCksum( 14536 i64 iRowid, 14537 int iCol, 14538 int iPos, 14539 int iIdx, 14540 const char *pTerm, 14541 int nTerm 14542 ){ 14543 int i; 14544 u64 ret = iRowid; 14545 ret += (ret<<3) + iCol; 14546 ret += (ret<<3) + iPos; 14547 if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); 14548 for(i=0; i<nTerm; i++) ret += (ret<<3) + pTerm[i]; 14549 return ret; 14550 } 14551 14552 #ifdef SQLITE_DEBUG 14553 /* 14554 ** This function is purely an internal test. It does not contribute to 14555 ** FTS functionality, or even the integrity-check, in any way. 14556 ** 14557 ** Instead, it tests that the same set of pgno/rowid combinations are 14558 ** visited regardless of whether the doclist-index identified by parameters 14559 ** iSegid/iLeaf is iterated in forwards or reverse order. 14560 */ 14561 static void fts5TestDlidxReverse( 14562 Fts5Index *p, 14563 int iSegid, /* Segment id to load from */ 14564 int iLeaf /* Load doclist-index for this leaf */ 14565 ){ 14566 Fts5DlidxIter *pDlidx = 0; 14567 u64 cksum1 = 13; 14568 u64 cksum2 = 13; 14569 14570 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); 14571 fts5DlidxIterEof(p, pDlidx)==0; 14572 fts5DlidxIterNext(p, pDlidx) 14573 ){ 14574 i64 iRowid = fts5DlidxIterRowid(pDlidx); 14575 int pgno = fts5DlidxIterPgno(pDlidx); 14576 assert( pgno>iLeaf ); 14577 cksum1 += iRowid + ((i64)pgno<<32); 14578 } 14579 fts5DlidxIterFree(pDlidx); 14580 pDlidx = 0; 14581 14582 for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); 14583 fts5DlidxIterEof(p, pDlidx)==0; 14584 fts5DlidxIterPrev(p, pDlidx) 14585 ){ 14586 i64 iRowid = fts5DlidxIterRowid(pDlidx); 14587 int pgno = fts5DlidxIterPgno(pDlidx); 14588 assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); 14589 cksum2 += iRowid + ((i64)pgno<<32); 14590 } 14591 fts5DlidxIterFree(pDlidx); 14592 pDlidx = 0; 14593 14594 if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; 14595 } 14596 14597 static int fts5QueryCksum( 14598 Fts5Index *p, /* Fts5 index object */ 14599 int iIdx, 14600 const char *z, /* Index key to query for */ 14601 int n, /* Size of index key in bytes */ 14602 int flags, /* Flags for Fts5IndexQuery */ 14603 u64 *pCksum /* IN/OUT: Checksum value */ 14604 ){ 14605 int eDetail = p->pConfig->eDetail; 14606 u64 cksum = *pCksum; 14607 Fts5IndexIter *pIter = 0; 14608 int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); 14609 14610 while( rc==SQLITE_OK && ALWAYS(pIter!=0) && 0==sqlite3Fts5IterEof(pIter) ){ 14611 i64 rowid = pIter->iRowid; 14612 14613 if( eDetail==FTS5_DETAIL_NONE ){ 14614 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); 14615 }else{ 14616 Fts5PoslistReader sReader; 14617 for(sqlite3Fts5PoslistReaderInit(pIter->pData, pIter->nData, &sReader); 14618 sReader.bEof==0; 14619 sqlite3Fts5PoslistReaderNext(&sReader) 14620 ){ 14621 int iCol = FTS5_POS2COLUMN(sReader.iPos); 14622 int iOff = FTS5_POS2OFFSET(sReader.iPos); 14623 cksum ^= sqlite3Fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); 14624 } 14625 } 14626 if( rc==SQLITE_OK ){ 14627 rc = sqlite3Fts5IterNext(pIter); 14628 } 14629 } 14630 sqlite3Fts5IterClose(pIter); 14631 14632 *pCksum = cksum; 14633 return rc; 14634 } 14635 14636 /* 14637 ** Check if buffer z[], size n bytes, contains as series of valid utf-8 14638 ** encoded codepoints. If so, return 0. Otherwise, if the buffer does not 14639 ** contain valid utf-8, return non-zero. 14640 */ 14641 static int fts5TestUtf8(const char *z, int n){ 14642 int i = 0; 14643 assert_nc( n>0 ); 14644 while( i<n ){ 14645 if( (z[i] & 0x80)==0x00 ){ 14646 i++; 14647 }else 14648 if( (z[i] & 0xE0)==0xC0 ){ 14649 if( i+1>=n || (z[i+1] & 0xC0)!=0x80 ) return 1; 14650 i += 2; 14651 }else 14652 if( (z[i] & 0xF0)==0xE0 ){ 14653 if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; 14654 i += 3; 14655 }else 14656 if( (z[i] & 0xF8)==0xF0 ){ 14657 if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; 14658 if( (z[i+2] & 0xC0)!=0x80 ) return 1; 14659 i += 3; 14660 }else{ 14661 return 1; 14662 } 14663 } 14664 14665 return 0; 14666 } 14667 14668 /* 14669 ** This function is also purely an internal test. It does not contribute to 14670 ** FTS functionality, or even the integrity-check, in any way. 14671 */ 14672 static void fts5TestTerm( 14673 Fts5Index *p, 14674 Fts5Buffer *pPrev, /* Previous term */ 14675 const char *z, int n, /* Possibly new term to test */ 14676 u64 expected, 14677 u64 *pCksum 14678 ){ 14679 int rc = p->rc; 14680 if( pPrev->n==0 ){ 14681 fts5BufferSet(&rc, pPrev, n, (const u8*)z); 14682 }else 14683 if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ 14684 u64 cksum3 = *pCksum; 14685 const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ 14686 int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ 14687 int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); 14688 int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); 14689 u64 ck1 = 0; 14690 u64 ck2 = 0; 14691 14692 /* Check that the results returned for ASC and DESC queries are 14693 ** the same. If not, call this corruption. */ 14694 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); 14695 if( rc==SQLITE_OK ){ 14696 int f = flags|FTS5INDEX_QUERY_DESC; 14697 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 14698 } 14699 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 14700 14701 /* If this is a prefix query, check that the results returned if the 14702 ** the index is disabled are the same. In both ASC and DESC order. 14703 ** 14704 ** This check may only be performed if the hash table is empty. This 14705 ** is because the hash table only supports a single scan query at 14706 ** a time, and the multi-iter loop from which this function is called 14707 ** is already performing such a scan. 14708 ** 14709 ** Also only do this if buffer zTerm contains nTerm bytes of valid 14710 ** utf-8. Otherwise, the last part of the buffer contents might contain 14711 ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 14712 ** character stored in the main fts index, which will cause the 14713 ** test to fail. */ 14714 if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ 14715 if( iIdx>0 && rc==SQLITE_OK ){ 14716 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; 14717 ck2 = 0; 14718 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 14719 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 14720 } 14721 if( iIdx>0 && rc==SQLITE_OK ){ 14722 int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; 14723 ck2 = 0; 14724 rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); 14725 if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; 14726 } 14727 } 14728 14729 cksum3 ^= ck1; 14730 fts5BufferSet(&rc, pPrev, n, (const u8*)z); 14731 14732 if( rc==SQLITE_OK && cksum3!=expected ){ 14733 rc = FTS5_CORRUPT; 14734 } 14735 *pCksum = cksum3; 14736 } 14737 p->rc = rc; 14738 } 14739 14740 #else 14741 # define fts5TestDlidxReverse(x,y,z) 14742 # define fts5TestTerm(u,v,w,x,y,z) 14743 #endif 14744 14745 /* 14746 ** Check that: 14747 ** 14748 ** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and 14749 ** contain zero terms. 14750 ** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and 14751 ** contain zero rowids. 14752 */ 14753 static void fts5IndexIntegrityCheckEmpty( 14754 Fts5Index *p, 14755 Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ 14756 int iFirst, 14757 int iNoRowid, 14758 int iLast 14759 ){ 14760 int i; 14761 14762 /* Now check that the iter.nEmpty leaves following the current leaf 14763 ** (a) exist and (b) contain no terms. */ 14764 for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ 14765 Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); 14766 if( pLeaf ){ 14767 if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; 14768 if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; 14769 } 14770 fts5DataRelease(pLeaf); 14771 } 14772 } 14773 14774 static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ 14775 int iTermOff = 0; 14776 int ii; 14777 14778 Fts5Buffer buf1 = {0,0,0}; 14779 Fts5Buffer buf2 = {0,0,0}; 14780 14781 ii = pLeaf->szLeaf; 14782 while( ii<pLeaf->nn && p->rc==SQLITE_OK ){ 14783 int res; 14784 int iOff; 14785 int nIncr; 14786 14787 ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); 14788 iTermOff += nIncr; 14789 iOff = iTermOff; 14790 14791 if( iOff>=pLeaf->szLeaf ){ 14792 p->rc = FTS5_CORRUPT; 14793 }else if( iTermOff==nIncr ){ 14794 int nByte; 14795 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); 14796 if( (iOff+nByte)>pLeaf->szLeaf ){ 14797 p->rc = FTS5_CORRUPT; 14798 }else{ 14799 fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); 14800 } 14801 }else{ 14802 int nKeep, nByte; 14803 iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); 14804 iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); 14805 if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ 14806 p->rc = FTS5_CORRUPT; 14807 }else{ 14808 buf1.n = nKeep; 14809 fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); 14810 } 14811 14812 if( p->rc==SQLITE_OK ){ 14813 res = fts5BufferCompare(&buf1, &buf2); 14814 if( res<=0 ) p->rc = FTS5_CORRUPT; 14815 } 14816 } 14817 fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); 14818 } 14819 14820 fts5BufferFree(&buf1); 14821 fts5BufferFree(&buf2); 14822 } 14823 14824 static void fts5IndexIntegrityCheckSegment( 14825 Fts5Index *p, /* FTS5 backend object */ 14826 Fts5StructureSegment *pSeg /* Segment to check internal consistency */ 14827 ){ 14828 Fts5Config *pConfig = p->pConfig; 14829 sqlite3_stmt *pStmt = 0; 14830 int rc2; 14831 int iIdxPrevLeaf = pSeg->pgnoFirst-1; 14832 int iDlidxPrevLeaf = pSeg->pgnoLast; 14833 14834 if( pSeg->pgnoFirst==0 ) return; 14835 14836 fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( 14837 "SELECT segid, term, (pgno>>1), (pgno&1) FROM %Q.'%q_idx' WHERE segid=%d " 14838 "ORDER BY 1, 2", 14839 pConfig->zDb, pConfig->zName, pSeg->iSegid 14840 )); 14841 14842 /* Iterate through the b-tree hierarchy. */ 14843 while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ 14844 i64 iRow; /* Rowid for this leaf */ 14845 Fts5Data *pLeaf; /* Data for this leaf */ 14846 14847 const char *zIdxTerm = (const char*)sqlite3_column_blob(pStmt, 1); 14848 int nIdxTerm = sqlite3_column_bytes(pStmt, 1); 14849 int iIdxLeaf = sqlite3_column_int(pStmt, 2); 14850 int bIdxDlidx = sqlite3_column_int(pStmt, 3); 14851 14852 /* If the leaf in question has already been trimmed from the segment, 14853 ** ignore this b-tree entry. Otherwise, load it into memory. */ 14854 if( iIdxLeaf<pSeg->pgnoFirst ) continue; 14855 iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); 14856 pLeaf = fts5LeafRead(p, iRow); 14857 if( pLeaf==0 ) break; 14858 14859 /* Check that the leaf contains at least one term, and that it is equal 14860 ** to or larger than the split-key in zIdxTerm. Also check that if there 14861 ** is also a rowid pointer within the leaf page header, it points to a 14862 ** location before the term. */ 14863 if( pLeaf->nn<=pLeaf->szLeaf ){ 14864 p->rc = FTS5_CORRUPT; 14865 }else{ 14866 int iOff; /* Offset of first term on leaf */ 14867 int iRowidOff; /* Offset of first rowid on leaf */ 14868 int nTerm; /* Size of term on leaf in bytes */ 14869 int res; /* Comparison of term and split-key */ 14870 14871 iOff = fts5LeafFirstTermOff(pLeaf); 14872 iRowidOff = fts5LeafFirstRowidOff(pLeaf); 14873 if( iRowidOff>=iOff || iOff>=pLeaf->szLeaf ){ 14874 p->rc = FTS5_CORRUPT; 14875 }else{ 14876 iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); 14877 res = fts5Memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); 14878 if( res==0 ) res = nTerm - nIdxTerm; 14879 if( res<0 ) p->rc = FTS5_CORRUPT; 14880 } 14881 14882 fts5IntegrityCheckPgidx(p, pLeaf); 14883 } 14884 fts5DataRelease(pLeaf); 14885 if( p->rc ) break; 14886 14887 /* Now check that the iter.nEmpty leaves following the current leaf 14888 ** (a) exist and (b) contain no terms. */ 14889 fts5IndexIntegrityCheckEmpty( 14890 p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 14891 ); 14892 if( p->rc ) break; 14893 14894 /* If there is a doclist-index, check that it looks right. */ 14895 if( bIdxDlidx ){ 14896 Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ 14897 int iPrevLeaf = iIdxLeaf; 14898 int iSegid = pSeg->iSegid; 14899 int iPg = 0; 14900 i64 iKey; 14901 14902 for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); 14903 fts5DlidxIterEof(p, pDlidx)==0; 14904 fts5DlidxIterNext(p, pDlidx) 14905 ){ 14906 14907 /* Check any rowid-less pages that occur before the current leaf. */ 14908 for(iPg=iPrevLeaf+1; iPg<fts5DlidxIterPgno(pDlidx); iPg++){ 14909 iKey = FTS5_SEGMENT_ROWID(iSegid, iPg); 14910 pLeaf = fts5DataRead(p, iKey); 14911 if( pLeaf ){ 14912 if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; 14913 fts5DataRelease(pLeaf); 14914 } 14915 } 14916 iPrevLeaf = fts5DlidxIterPgno(pDlidx); 14917 14918 /* Check that the leaf page indicated by the iterator really does 14919 ** contain the rowid suggested by the same. */ 14920 iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); 14921 pLeaf = fts5DataRead(p, iKey); 14922 if( pLeaf ){ 14923 i64 iRowid; 14924 int iRowidOff = fts5LeafFirstRowidOff(pLeaf); 14925 ASSERT_SZLEAF_OK(pLeaf); 14926 if( iRowidOff>=pLeaf->szLeaf ){ 14927 p->rc = FTS5_CORRUPT; 14928 }else{ 14929 fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); 14930 if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; 14931 } 14932 fts5DataRelease(pLeaf); 14933 } 14934 } 14935 14936 iDlidxPrevLeaf = iPg; 14937 fts5DlidxIterFree(pDlidx); 14938 fts5TestDlidxReverse(p, iSegid, iIdxLeaf); 14939 }else{ 14940 iDlidxPrevLeaf = pSeg->pgnoLast; 14941 /* TODO: Check there is no doclist index */ 14942 } 14943 14944 iIdxPrevLeaf = iIdxLeaf; 14945 } 14946 14947 rc2 = sqlite3_finalize(pStmt); 14948 if( p->rc==SQLITE_OK ) p->rc = rc2; 14949 14950 /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ 14951 #if 0 14952 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ 14953 p->rc = FTS5_CORRUPT; 14954 } 14955 #endif 14956 } 14957 14958 14959 /* 14960 ** Run internal checks to ensure that the FTS index (a) is internally 14961 ** consistent and (b) contains entries for which the XOR of the checksums 14962 ** as calculated by sqlite3Fts5IndexEntryCksum() is cksum. 14963 ** 14964 ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the 14965 ** checksum does not match. Return SQLITE_OK if all checks pass without 14966 ** error, or some other SQLite error code if another error (e.g. OOM) 14967 ** occurs. 14968 */ 14969 static int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum, int bUseCksum){ 14970 int eDetail = p->pConfig->eDetail; 14971 u64 cksum2 = 0; /* Checksum based on contents of indexes */ 14972 Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ 14973 Fts5Iter *pIter; /* Used to iterate through entire index */ 14974 Fts5Structure *pStruct; /* Index structure */ 14975 int iLvl, iSeg; 14976 14977 #ifdef SQLITE_DEBUG 14978 /* Used by extra internal tests only run if NDEBUG is not defined */ 14979 u64 cksum3 = 0; /* Checksum based on contents of indexes */ 14980 Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ 14981 #endif 14982 const int flags = FTS5INDEX_QUERY_NOOUTPUT; 14983 14984 /* Load the FTS index structure */ 14985 pStruct = fts5StructureRead(p); 14986 if( pStruct==0 ){ 14987 assert( p->rc!=SQLITE_OK ); 14988 return fts5IndexReturn(p); 14989 } 14990 14991 /* Check that the internal nodes of each segment match the leaves */ 14992 for(iLvl=0; iLvl<pStruct->nLevel; iLvl++){ 14993 for(iSeg=0; iSeg<pStruct->aLevel[iLvl].nSeg; iSeg++){ 14994 Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; 14995 fts5IndexIntegrityCheckSegment(p, pSeg); 14996 } 14997 } 14998 14999 /* The cksum argument passed to this function is a checksum calculated 15000 ** based on all expected entries in the FTS index (including prefix index 15001 ** entries). This block checks that a checksum calculated based on the 15002 ** actual contents of FTS index is identical. 15003 ** 15004 ** Two versions of the same checksum are calculated. The first (stack 15005 ** variable cksum2) based on entries extracted from the full-text index 15006 ** while doing a linear scan of each individual index in turn. 15007 ** 15008 ** As each term visited by the linear scans, a separate query for the 15009 ** same term is performed. cksum3 is calculated based on the entries 15010 ** extracted by these queries. 15011 */ 15012 for(fts5MultiIterNew(p, pStruct, flags, 0, 0, 0, -1, 0, &pIter); 15013 fts5MultiIterEof(p, pIter)==0; 15014 fts5MultiIterNext(p, pIter, 0, 0) 15015 ){ 15016 int n; /* Size of term in bytes */ 15017 i64 iPos = 0; /* Position read from poslist */ 15018 int iOff = 0; /* Offset within poslist */ 15019 i64 iRowid = fts5MultiIterRowid(pIter); 15020 char *z = (char*)fts5MultiIterTerm(pIter, &n); 15021 15022 /* If this is a new term, query for it. Update cksum3 with the results. */ 15023 fts5TestTerm(p, &term, z, n, cksum2, &cksum3); 15024 if( p->rc ) break; 15025 15026 if( eDetail==FTS5_DETAIL_NONE ){ 15027 if( 0==fts5MultiIterIsEmpty(p, pIter) ){ 15028 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, 0, 0, -1, z, n); 15029 } 15030 }else{ 15031 poslist.n = 0; 15032 fts5SegiterPoslist(p, &pIter->aSeg[pIter->aFirst[1].iFirst], 0, &poslist); 15033 fts5BufferAppendBlob(&p->rc, &poslist, 4, (const u8*)"\0\0\0\0"); 15034 while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ 15035 int iCol = FTS5_POS2COLUMN(iPos); 15036 int iTokOff = FTS5_POS2OFFSET(iPos); 15037 cksum2 ^= sqlite3Fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); 15038 } 15039 } 15040 } 15041 fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); 15042 15043 fts5MultiIterFree(pIter); 15044 if( p->rc==SQLITE_OK && bUseCksum && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; 15045 15046 fts5StructureRelease(pStruct); 15047 #ifdef SQLITE_DEBUG 15048 fts5BufferFree(&term); 15049 #endif 15050 fts5BufferFree(&poslist); 15051 return fts5IndexReturn(p); 15052 } 15053 15054 /************************************************************************* 15055 ************************************************************************** 15056 ** Below this point is the implementation of the fts5_decode() scalar 15057 ** function only. 15058 */ 15059 15060 #ifdef SQLITE_TEST 15061 /* 15062 ** Decode a segment-data rowid from the %_data table. This function is 15063 ** the opposite of macro FTS5_SEGMENT_ROWID(). 15064 */ 15065 static void fts5DecodeRowid( 15066 i64 iRowid, /* Rowid from %_data table */ 15067 int *piSegid, /* OUT: Segment id */ 15068 int *pbDlidx, /* OUT: Dlidx flag */ 15069 int *piHeight, /* OUT: Height */ 15070 int *piPgno /* OUT: Page number */ 15071 ){ 15072 *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); 15073 iRowid >>= FTS5_DATA_PAGE_B; 15074 15075 *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); 15076 iRowid >>= FTS5_DATA_HEIGHT_B; 15077 15078 *pbDlidx = (int)(iRowid & 0x0001); 15079 iRowid >>= FTS5_DATA_DLI_B; 15080 15081 *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); 15082 } 15083 #endif /* SQLITE_TEST */ 15084 15085 #ifdef SQLITE_TEST 15086 static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ 15087 int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ 15088 fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); 15089 15090 if( iSegid==0 ){ 15091 if( iKey==FTS5_AVERAGES_ROWID ){ 15092 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{averages} "); 15093 }else{ 15094 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{structure}"); 15095 } 15096 } 15097 else{ 15098 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "{%ssegid=%d h=%d pgno=%d}", 15099 bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno 15100 ); 15101 } 15102 } 15103 #endif /* SQLITE_TEST */ 15104 15105 #ifdef SQLITE_TEST 15106 static void fts5DebugStructure( 15107 int *pRc, /* IN/OUT: error code */ 15108 Fts5Buffer *pBuf, 15109 Fts5Structure *p 15110 ){ 15111 int iLvl, iSeg; /* Iterate through levels, segments */ 15112 15113 for(iLvl=0; iLvl<p->nLevel; iLvl++){ 15114 Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; 15115 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, 15116 " {lvl=%d nMerge=%d nSeg=%d", iLvl, pLvl->nMerge, pLvl->nSeg 15117 ); 15118 for(iSeg=0; iSeg<pLvl->nSeg; iSeg++){ 15119 Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; 15120 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", 15121 pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast 15122 ); 15123 } 15124 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); 15125 } 15126 } 15127 #endif /* SQLITE_TEST */ 15128 15129 #ifdef SQLITE_TEST 15130 /* 15131 ** This is part of the fts5_decode() debugging aid. 15132 ** 15133 ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This 15134 ** function appends a human-readable representation of the same object 15135 ** to the buffer passed as the second argument. 15136 */ 15137 static void fts5DecodeStructure( 15138 int *pRc, /* IN/OUT: error code */ 15139 Fts5Buffer *pBuf, 15140 const u8 *pBlob, int nBlob 15141 ){ 15142 int rc; /* Return code */ 15143 Fts5Structure *p = 0; /* Decoded structure object */ 15144 15145 rc = fts5StructureDecode(pBlob, nBlob, 0, &p); 15146 if( rc!=SQLITE_OK ){ 15147 *pRc = rc; 15148 return; 15149 } 15150 15151 fts5DebugStructure(pRc, pBuf, p); 15152 fts5StructureRelease(p); 15153 } 15154 #endif /* SQLITE_TEST */ 15155 15156 #ifdef SQLITE_TEST 15157 /* 15158 ** This is part of the fts5_decode() debugging aid. 15159 ** 15160 ** Arguments pBlob/nBlob contain an "averages" record. This function 15161 ** appends a human-readable representation of record to the buffer passed 15162 ** as the second argument. 15163 */ 15164 static void fts5DecodeAverages( 15165 int *pRc, /* IN/OUT: error code */ 15166 Fts5Buffer *pBuf, 15167 const u8 *pBlob, int nBlob 15168 ){ 15169 int i = 0; 15170 const char *zSpace = ""; 15171 15172 while( i<nBlob ){ 15173 u64 iVal; 15174 i += sqlite3Fts5GetVarint(&pBlob[i], &iVal); 15175 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "%s%d", zSpace, (int)iVal); 15176 zSpace = " "; 15177 } 15178 } 15179 #endif /* SQLITE_TEST */ 15180 15181 #ifdef SQLITE_TEST 15182 /* 15183 ** Buffer (a/n) is assumed to contain a list of serialized varints. Read 15184 ** each varint and append its string representation to buffer pBuf. Return 15185 ** after either the input buffer is exhausted or a 0 value is read. 15186 ** 15187 ** The return value is the number of bytes read from the input buffer. 15188 */ 15189 static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ 15190 int iOff = 0; 15191 while( iOff<n ){ 15192 int iVal; 15193 iOff += fts5GetVarint32(&a[iOff], iVal); 15194 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %d", iVal); 15195 } 15196 return iOff; 15197 } 15198 #endif /* SQLITE_TEST */ 15199 15200 #ifdef SQLITE_TEST 15201 /* 15202 ** The start of buffer (a/n) contains the start of a doclist. The doclist 15203 ** may or may not finish within the buffer. This function appends a text 15204 ** representation of the part of the doclist that is present to buffer 15205 ** pBuf. 15206 ** 15207 ** The return value is the number of bytes read from the input buffer. 15208 */ 15209 static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ 15210 i64 iDocid = 0; 15211 int iOff = 0; 15212 15213 if( n>0 ){ 15214 iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); 15215 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); 15216 } 15217 while( iOff<n ){ 15218 int nPos; 15219 int bDel; 15220 iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDel); 15221 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " nPos=%d%s", nPos, bDel?"*":""); 15222 iOff += fts5DecodePoslist(pRc, pBuf, &a[iOff], MIN(n-iOff, nPos)); 15223 if( iOff<n ){ 15224 i64 iDelta; 15225 iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDelta); 15226 iDocid += iDelta; 15227 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); 15228 } 15229 } 15230 15231 return iOff; 15232 } 15233 #endif /* SQLITE_TEST */ 15234 15235 #ifdef SQLITE_TEST 15236 /* 15237 ** This function is part of the fts5_decode() debugging function. It is 15238 ** only ever used with detail=none tables. 15239 ** 15240 ** Buffer (pData/nData) contains a doclist in the format used by detail=none 15241 ** tables. This function appends a human-readable version of that list to 15242 ** buffer pBuf. 15243 ** 15244 ** If *pRc is other than SQLITE_OK when this function is called, it is a 15245 ** no-op. If an OOM or other error occurs within this function, *pRc is 15246 ** set to an SQLite error code before returning. The final state of buffer 15247 ** pBuf is undefined in this case. 15248 */ 15249 static void fts5DecodeRowidList( 15250 int *pRc, /* IN/OUT: Error code */ 15251 Fts5Buffer *pBuf, /* Buffer to append text to */ 15252 const u8 *pData, int nData /* Data to decode list-of-rowids from */ 15253 ){ 15254 int i = 0; 15255 i64 iRowid = 0; 15256 15257 while( i<nData ){ 15258 const char *zApp = ""; 15259 u64 iVal; 15260 i += sqlite3Fts5GetVarint(&pData[i], &iVal); 15261 iRowid += iVal; 15262 15263 if( i<nData && pData[i]==0x00 ){ 15264 i++; 15265 if( i<nData && pData[i]==0x00 ){ 15266 i++; 15267 zApp = "+"; 15268 }else{ 15269 zApp = "*"; 15270 } 15271 } 15272 15273 sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " %lld%s", iRowid, zApp); 15274 } 15275 } 15276 #endif /* SQLITE_TEST */ 15277 15278 #ifdef SQLITE_TEST 15279 /* 15280 ** The implementation of user-defined scalar function fts5_decode(). 15281 */ 15282 static void fts5DecodeFunction( 15283 sqlite3_context *pCtx, /* Function call context */ 15284 int nArg, /* Number of args (always 2) */ 15285 sqlite3_value **apVal /* Function arguments */ 15286 ){ 15287 i64 iRowid; /* Rowid for record being decoded */ 15288 int iSegid,iHeight,iPgno,bDlidx;/* Rowid components */ 15289 const u8 *aBlob; int n; /* Record to decode */ 15290 u8 *a = 0; 15291 Fts5Buffer s; /* Build up text to return here */ 15292 int rc = SQLITE_OK; /* Return code */ 15293 sqlite3_int64 nSpace = 0; 15294 int eDetailNone = (sqlite3_user_data(pCtx)!=0); 15295 15296 assert( nArg==2 ); 15297 UNUSED_PARAM(nArg); 15298 memset(&s, 0, sizeof(Fts5Buffer)); 15299 iRowid = sqlite3_value_int64(apVal[0]); 15300 15301 /* Make a copy of the second argument (a blob) in aBlob[]. The aBlob[] 15302 ** copy is followed by FTS5_DATA_ZERO_PADDING 0x00 bytes, which prevents 15303 ** buffer overreads even if the record is corrupt. */ 15304 n = sqlite3_value_bytes(apVal[1]); 15305 aBlob = sqlite3_value_blob(apVal[1]); 15306 nSpace = n + FTS5_DATA_ZERO_PADDING; 15307 a = (u8*)sqlite3Fts5MallocZero(&rc, nSpace); 15308 if( a==0 ) goto decode_out; 15309 if( n>0 ) memcpy(a, aBlob, n); 15310 15311 fts5DecodeRowid(iRowid, &iSegid, &bDlidx, &iHeight, &iPgno); 15312 15313 fts5DebugRowid(&rc, &s, iRowid); 15314 if( bDlidx ){ 15315 Fts5Data dlidx; 15316 Fts5DlidxLvl lvl; 15317 15318 dlidx.p = a; 15319 dlidx.nn = n; 15320 15321 memset(&lvl, 0, sizeof(Fts5DlidxLvl)); 15322 lvl.pData = &dlidx; 15323 lvl.iLeafPgno = iPgno; 15324 15325 for(fts5DlidxLvlNext(&lvl); lvl.bEof==0; fts5DlidxLvlNext(&lvl)){ 15326 sqlite3Fts5BufferAppendPrintf(&rc, &s, 15327 " %d(%lld)", lvl.iLeafPgno, lvl.iRowid 15328 ); 15329 } 15330 }else if( iSegid==0 ){ 15331 if( iRowid==FTS5_AVERAGES_ROWID ){ 15332 fts5DecodeAverages(&rc, &s, a, n); 15333 }else{ 15334 fts5DecodeStructure(&rc, &s, a, n); 15335 } 15336 }else if( eDetailNone ){ 15337 Fts5Buffer term; /* Current term read from page */ 15338 int szLeaf; 15339 int iPgidxOff = szLeaf = fts5GetU16(&a[2]); 15340 int iTermOff; 15341 int nKeep = 0; 15342 int iOff; 15343 15344 memset(&term, 0, sizeof(Fts5Buffer)); 15345 15346 /* Decode any entries that occur before the first term. */ 15347 if( szLeaf<n ){ 15348 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], iTermOff); 15349 }else{ 15350 iTermOff = szLeaf; 15351 } 15352 fts5DecodeRowidList(&rc, &s, &a[4], iTermOff-4); 15353 15354 iOff = iTermOff; 15355 while( iOff<szLeaf ){ 15356 int nAppend; 15357 15358 /* Read the term data for the next term*/ 15359 iOff += fts5GetVarint32(&a[iOff], nAppend); 15360 term.n = nKeep; 15361 fts5BufferAppendBlob(&rc, &term, nAppend, &a[iOff]); 15362 sqlite3Fts5BufferAppendPrintf( 15363 &rc, &s, " term=%.*s", term.n, (const char*)term.p 15364 ); 15365 iOff += nAppend; 15366 15367 /* Figure out where the doclist for this term ends */ 15368 if( iPgidxOff<n ){ 15369 int nIncr; 15370 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nIncr); 15371 iTermOff += nIncr; 15372 }else{ 15373 iTermOff = szLeaf; 15374 } 15375 15376 fts5DecodeRowidList(&rc, &s, &a[iOff], iTermOff-iOff); 15377 iOff = iTermOff; 15378 if( iOff<szLeaf ){ 15379 iOff += fts5GetVarint32(&a[iOff], nKeep); 15380 } 15381 } 15382 15383 fts5BufferFree(&term); 15384 }else{ 15385 Fts5Buffer term; /* Current term read from page */ 15386 int szLeaf; /* Offset of pgidx in a[] */ 15387 int iPgidxOff; 15388 int iPgidxPrev = 0; /* Previous value read from pgidx */ 15389 int iTermOff = 0; 15390 int iRowidOff = 0; 15391 int iOff; 15392 int nDoclist; 15393 15394 memset(&term, 0, sizeof(Fts5Buffer)); 15395 15396 if( n<4 ){ 15397 sqlite3Fts5BufferSet(&rc, &s, 7, (const u8*)"corrupt"); 15398 goto decode_out; 15399 }else{ 15400 iRowidOff = fts5GetU16(&a[0]); 15401 iPgidxOff = szLeaf = fts5GetU16(&a[2]); 15402 if( iPgidxOff<n ){ 15403 fts5GetVarint32(&a[iPgidxOff], iTermOff); 15404 }else if( iPgidxOff>n ){ 15405 rc = FTS5_CORRUPT; 15406 goto decode_out; 15407 } 15408 } 15409 15410 /* Decode the position list tail at the start of the page */ 15411 if( iRowidOff!=0 ){ 15412 iOff = iRowidOff; 15413 }else if( iTermOff!=0 ){ 15414 iOff = iTermOff; 15415 }else{ 15416 iOff = szLeaf; 15417 } 15418 if( iOff>n ){ 15419 rc = FTS5_CORRUPT; 15420 goto decode_out; 15421 } 15422 fts5DecodePoslist(&rc, &s, &a[4], iOff-4); 15423 15424 /* Decode any more doclist data that appears on the page before the 15425 ** first term. */ 15426 nDoclist = (iTermOff ? iTermOff : szLeaf) - iOff; 15427 if( nDoclist+iOff>n ){ 15428 rc = FTS5_CORRUPT; 15429 goto decode_out; 15430 } 15431 fts5DecodeDoclist(&rc, &s, &a[iOff], nDoclist); 15432 15433 while( iPgidxOff<n && rc==SQLITE_OK ){ 15434 int bFirst = (iPgidxOff==szLeaf); /* True for first term on page */ 15435 int nByte; /* Bytes of data */ 15436 int iEnd; 15437 15438 iPgidxOff += fts5GetVarint32(&a[iPgidxOff], nByte); 15439 iPgidxPrev += nByte; 15440 iOff = iPgidxPrev; 15441 15442 if( iPgidxOff<n ){ 15443 fts5GetVarint32(&a[iPgidxOff], nByte); 15444 iEnd = iPgidxPrev + nByte; 15445 }else{ 15446 iEnd = szLeaf; 15447 } 15448 if( iEnd>szLeaf ){ 15449 rc = FTS5_CORRUPT; 15450 break; 15451 } 15452 15453 if( bFirst==0 ){ 15454 iOff += fts5GetVarint32(&a[iOff], nByte); 15455 if( nByte>term.n ){ 15456 rc = FTS5_CORRUPT; 15457 break; 15458 } 15459 term.n = nByte; 15460 } 15461 iOff += fts5GetVarint32(&a[iOff], nByte); 15462 if( iOff+nByte>n ){ 15463 rc = FTS5_CORRUPT; 15464 break; 15465 } 15466 fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); 15467 iOff += nByte; 15468 15469 sqlite3Fts5BufferAppendPrintf( 15470 &rc, &s, " term=%.*s", term.n, (const char*)term.p 15471 ); 15472 iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], iEnd-iOff); 15473 } 15474 15475 fts5BufferFree(&term); 15476 } 15477 15478 decode_out: 15479 sqlite3_free(a); 15480 if( rc==SQLITE_OK ){ 15481 sqlite3_result_text(pCtx, (const char*)s.p, s.n, SQLITE_TRANSIENT); 15482 }else{ 15483 sqlite3_result_error_code(pCtx, rc); 15484 } 15485 fts5BufferFree(&s); 15486 } 15487 #endif /* SQLITE_TEST */ 15488 15489 #ifdef SQLITE_TEST 15490 /* 15491 ** The implementation of user-defined scalar function fts5_rowid(). 15492 */ 15493 static void fts5RowidFunction( 15494 sqlite3_context *pCtx, /* Function call context */ 15495 int nArg, /* Number of args (always 2) */ 15496 sqlite3_value **apVal /* Function arguments */ 15497 ){ 15498 const char *zArg; 15499 if( nArg==0 ){ 15500 sqlite3_result_error(pCtx, "should be: fts5_rowid(subject, ....)", -1); 15501 }else{ 15502 zArg = (const char*)sqlite3_value_text(apVal[0]); 15503 if( 0==sqlite3_stricmp(zArg, "segment") ){ 15504 i64 iRowid; 15505 int segid, pgno; 15506 if( nArg!=3 ){ 15507 sqlite3_result_error(pCtx, 15508 "should be: fts5_rowid('segment', segid, pgno))", -1 15509 ); 15510 }else{ 15511 segid = sqlite3_value_int(apVal[1]); 15512 pgno = sqlite3_value_int(apVal[2]); 15513 iRowid = FTS5_SEGMENT_ROWID(segid, pgno); 15514 sqlite3_result_int64(pCtx, iRowid); 15515 } 15516 }else{ 15517 sqlite3_result_error(pCtx, 15518 "first arg to fts5_rowid() must be 'segment'" , -1 15519 ); 15520 } 15521 } 15522 } 15523 #endif /* SQLITE_TEST */ 15524 15525 /* 15526 ** This is called as part of registering the FTS5 module with database 15527 ** connection db. It registers several user-defined scalar functions useful 15528 ** with FTS5. 15529 ** 15530 ** If successful, SQLITE_OK is returned. If an error occurs, some other 15531 ** SQLite error code is returned instead. 15532 */ 15533 static int sqlite3Fts5IndexInit(sqlite3 *db){ 15534 #ifdef SQLITE_TEST 15535 int rc = sqlite3_create_function( 15536 db, "fts5_decode", 2, SQLITE_UTF8, 0, fts5DecodeFunction, 0, 0 15537 ); 15538 15539 if( rc==SQLITE_OK ){ 15540 rc = sqlite3_create_function( 15541 db, "fts5_decode_none", 2, 15542 SQLITE_UTF8, (void*)db, fts5DecodeFunction, 0, 0 15543 ); 15544 } 15545 15546 if( rc==SQLITE_OK ){ 15547 rc = sqlite3_create_function( 15548 db, "fts5_rowid", -1, SQLITE_UTF8, 0, fts5RowidFunction, 0, 0 15549 ); 15550 } 15551 return rc; 15552 #else 15553 return SQLITE_OK; 15554 UNUSED_PARAM(db); 15555 #endif 15556 } 15557 15558 15559 static int sqlite3Fts5IndexReset(Fts5Index *p){ 15560 assert( p->pStruct==0 || p->iStructVersion!=0 ); 15561 if( fts5IndexDataVersion(p)!=p->iStructVersion ){ 15562 fts5StructureInvalidate(p); 15563 } 15564 return fts5IndexReturn(p); 15565 } 15566 15567 #line 1 "fts5_main.c" 15568 /* 15569 ** 2014 Jun 09 15570 ** 15571 ** The author disclaims copyright to this source code. In place of 15572 ** a legal notice, here is a blessing: 15573 ** 15574 ** May you do good and not evil. 15575 ** May you find forgiveness for yourself and forgive others. 15576 ** May you share freely, never taking more than you give. 15577 ** 15578 ****************************************************************************** 15579 ** 15580 ** This is an SQLite module implementing full-text search. 15581 */ 15582 15583 15584 /* #include "fts5Int.h" */ 15585 15586 /* 15587 ** This variable is set to false when running tests for which the on disk 15588 ** structures should not be corrupt. Otherwise, true. If it is false, extra 15589 ** assert() conditions in the fts5 code are activated - conditions that are 15590 ** only true if it is guaranteed that the fts5 database is not corrupt. 15591 */ 15592 #ifdef SQLITE_DEBUG 15593 int sqlite3_fts5_may_be_corrupt = 1; 15594 #endif 15595 15596 15597 typedef struct Fts5Auxdata Fts5Auxdata; 15598 typedef struct Fts5Auxiliary Fts5Auxiliary; 15599 typedef struct Fts5Cursor Fts5Cursor; 15600 typedef struct Fts5FullTable Fts5FullTable; 15601 typedef struct Fts5Sorter Fts5Sorter; 15602 typedef struct Fts5TokenizerModule Fts5TokenizerModule; 15603 15604 /* 15605 ** NOTES ON TRANSACTIONS: 15606 ** 15607 ** SQLite invokes the following virtual table methods as transactions are 15608 ** opened and closed by the user: 15609 ** 15610 ** xBegin(): Start of a new transaction. 15611 ** xSync(): Initial part of two-phase commit. 15612 ** xCommit(): Final part of two-phase commit. 15613 ** xRollback(): Rollback the transaction. 15614 ** 15615 ** Anything that is required as part of a commit that may fail is performed 15616 ** in the xSync() callback. Current versions of SQLite ignore any errors 15617 ** returned by xCommit(). 15618 ** 15619 ** And as sub-transactions are opened/closed: 15620 ** 15621 ** xSavepoint(int S): Open savepoint S. 15622 ** xRelease(int S): Commit and close savepoint S. 15623 ** xRollbackTo(int S): Rollback to start of savepoint S. 15624 ** 15625 ** During a write-transaction the fts5_index.c module may cache some data 15626 ** in-memory. It is flushed to disk whenever xSync(), xRelease() or 15627 ** xSavepoint() is called. And discarded whenever xRollback() or xRollbackTo() 15628 ** is called. 15629 ** 15630 ** Additionally, if SQLITE_DEBUG is defined, an instance of the following 15631 ** structure is used to record the current transaction state. This information 15632 ** is not required, but it is used in the assert() statements executed by 15633 ** function fts5CheckTransactionState() (see below). 15634 */ 15635 struct Fts5TransactionState { 15636 int eState; /* 0==closed, 1==open, 2==synced */ 15637 int iSavepoint; /* Number of open savepoints (0 -> none) */ 15638 }; 15639 15640 /* 15641 ** A single object of this type is allocated when the FTS5 module is 15642 ** registered with a database handle. It is used to store pointers to 15643 ** all registered FTS5 extensions - tokenizers and auxiliary functions. 15644 */ 15645 struct Fts5Global { 15646 fts5_api api; /* User visible part of object (see fts5.h) */ 15647 sqlite3 *db; /* Associated database connection */ 15648 i64 iNextId; /* Used to allocate unique cursor ids */ 15649 Fts5Auxiliary *pAux; /* First in list of all aux. functions */ 15650 Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ 15651 Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ 15652 Fts5Cursor *pCsr; /* First in list of all open cursors */ 15653 }; 15654 15655 /* 15656 ** Each auxiliary function registered with the FTS5 module is represented 15657 ** by an object of the following type. All such objects are stored as part 15658 ** of the Fts5Global.pAux list. 15659 */ 15660 struct Fts5Auxiliary { 15661 Fts5Global *pGlobal; /* Global context for this function */ 15662 char *zFunc; /* Function name (nul-terminated) */ 15663 void *pUserData; /* User-data pointer */ 15664 fts5_extension_function xFunc; /* Callback function */ 15665 void (*xDestroy)(void*); /* Destructor function */ 15666 Fts5Auxiliary *pNext; /* Next registered auxiliary function */ 15667 }; 15668 15669 /* 15670 ** Each tokenizer module registered with the FTS5 module is represented 15671 ** by an object of the following type. All such objects are stored as part 15672 ** of the Fts5Global.pTok list. 15673 */ 15674 struct Fts5TokenizerModule { 15675 char *zName; /* Name of tokenizer */ 15676 void *pUserData; /* User pointer passed to xCreate() */ 15677 fts5_tokenizer x; /* Tokenizer functions */ 15678 void (*xDestroy)(void*); /* Destructor function */ 15679 Fts5TokenizerModule *pNext; /* Next registered tokenizer module */ 15680 }; 15681 15682 struct Fts5FullTable { 15683 Fts5Table p; /* Public class members from fts5Int.h */ 15684 Fts5Storage *pStorage; /* Document store */ 15685 Fts5Global *pGlobal; /* Global (connection wide) data */ 15686 Fts5Cursor *pSortCsr; /* Sort data from this cursor */ 15687 #ifdef SQLITE_DEBUG 15688 struct Fts5TransactionState ts; 15689 #endif 15690 }; 15691 15692 struct Fts5MatchPhrase { 15693 Fts5Buffer *pPoslist; /* Pointer to current poslist */ 15694 int nTerm; /* Size of phrase in terms */ 15695 }; 15696 15697 /* 15698 ** pStmt: 15699 ** SELECT rowid, <fts> FROM <fts> ORDER BY +rank; 15700 ** 15701 ** aIdx[]: 15702 ** There is one entry in the aIdx[] array for each phrase in the query, 15703 ** the value of which is the offset within aPoslist[] following the last 15704 ** byte of the position list for the corresponding phrase. 15705 */ 15706 struct Fts5Sorter { 15707 sqlite3_stmt *pStmt; 15708 i64 iRowid; /* Current rowid */ 15709 const u8 *aPoslist; /* Position lists for current row */ 15710 int nIdx; /* Number of entries in aIdx[] */ 15711 int aIdx[1]; /* Offsets into aPoslist for current row */ 15712 }; 15713 15714 15715 /* 15716 ** Virtual-table cursor object. 15717 ** 15718 ** iSpecial: 15719 ** If this is a 'special' query (refer to function fts5SpecialMatch()), 15720 ** then this variable contains the result of the query. 15721 ** 15722 ** iFirstRowid, iLastRowid: 15723 ** These variables are only used for FTS5_PLAN_MATCH cursors. Assuming the 15724 ** cursor iterates in ascending order of rowids, iFirstRowid is the lower 15725 ** limit of rowids to return, and iLastRowid the upper. In other words, the 15726 ** WHERE clause in the user's query might have been: 15727 ** 15728 ** <tbl> MATCH <expr> AND rowid BETWEEN $iFirstRowid AND $iLastRowid 15729 ** 15730 ** If the cursor iterates in descending order of rowid, iFirstRowid 15731 ** is the upper limit (i.e. the "first" rowid visited) and iLastRowid 15732 ** the lower. 15733 */ 15734 struct Fts5Cursor { 15735 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ 15736 Fts5Cursor *pNext; /* Next cursor in Fts5Cursor.pCsr list */ 15737 int *aColumnSize; /* Values for xColumnSize() */ 15738 i64 iCsrId; /* Cursor id */ 15739 15740 /* Zero from this point onwards on cursor reset */ 15741 int ePlan; /* FTS5_PLAN_XXX value */ 15742 int bDesc; /* True for "ORDER BY rowid DESC" queries */ 15743 i64 iFirstRowid; /* Return no rowids earlier than this */ 15744 i64 iLastRowid; /* Return no rowids later than this */ 15745 sqlite3_stmt *pStmt; /* Statement used to read %_content */ 15746 Fts5Expr *pExpr; /* Expression for MATCH queries */ 15747 Fts5Sorter *pSorter; /* Sorter for "ORDER BY rank" queries */ 15748 int csrflags; /* Mask of cursor flags (see below) */ 15749 i64 iSpecial; /* Result of special query */ 15750 15751 /* "rank" function. Populated on demand from vtab.xColumn(). */ 15752 char *zRank; /* Custom rank function */ 15753 char *zRankArgs; /* Custom rank function args */ 15754 Fts5Auxiliary *pRank; /* Rank callback (or NULL) */ 15755 int nRankArg; /* Number of trailing arguments for rank() */ 15756 sqlite3_value **apRankArg; /* Array of trailing arguments */ 15757 sqlite3_stmt *pRankArgStmt; /* Origin of objects in apRankArg[] */ 15758 15759 /* Auxiliary data storage */ 15760 Fts5Auxiliary *pAux; /* Currently executing extension function */ 15761 Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */ 15762 15763 /* Cache used by auxiliary functions xInst() and xInstCount() */ 15764 Fts5PoslistReader *aInstIter; /* One for each phrase */ 15765 int nInstAlloc; /* Size of aInst[] array (entries / 3) */ 15766 int nInstCount; /* Number of phrase instances */ 15767 int *aInst; /* 3 integers per phrase instance */ 15768 }; 15769 15770 /* 15771 ** Bits that make up the "idxNum" parameter passed indirectly by 15772 ** xBestIndex() to xFilter(). 15773 */ 15774 #define FTS5_BI_MATCH 0x0001 /* <tbl> MATCH ? */ 15775 #define FTS5_BI_RANK 0x0002 /* rank MATCH ? */ 15776 #define FTS5_BI_ROWID_EQ 0x0004 /* rowid == ? */ 15777 #define FTS5_BI_ROWID_LE 0x0008 /* rowid <= ? */ 15778 #define FTS5_BI_ROWID_GE 0x0010 /* rowid >= ? */ 15779 15780 #define FTS5_BI_ORDER_RANK 0x0020 15781 #define FTS5_BI_ORDER_ROWID 0x0040 15782 #define FTS5_BI_ORDER_DESC 0x0080 15783 15784 /* 15785 ** Values for Fts5Cursor.csrflags 15786 */ 15787 #define FTS5CSR_EOF 0x01 15788 #define FTS5CSR_REQUIRE_CONTENT 0x02 15789 #define FTS5CSR_REQUIRE_DOCSIZE 0x04 15790 #define FTS5CSR_REQUIRE_INST 0x08 15791 #define FTS5CSR_FREE_ZRANK 0x10 15792 #define FTS5CSR_REQUIRE_RESEEK 0x20 15793 #define FTS5CSR_REQUIRE_POSLIST 0x40 15794 15795 #define BitFlagAllTest(x,y) (((x) & (y))==(y)) 15796 #define BitFlagTest(x,y) (((x) & (y))!=0) 15797 15798 15799 /* 15800 ** Macros to Set(), Clear() and Test() cursor flags. 15801 */ 15802 #define CsrFlagSet(pCsr, flag) ((pCsr)->csrflags |= (flag)) 15803 #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) 15804 #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) 15805 15806 struct Fts5Auxdata { 15807 Fts5Auxiliary *pAux; /* Extension to which this belongs */ 15808 void *pPtr; /* Pointer value */ 15809 void(*xDelete)(void*); /* Destructor */ 15810 Fts5Auxdata *pNext; /* Next object in linked list */ 15811 }; 15812 15813 #ifdef SQLITE_DEBUG 15814 #define FTS5_BEGIN 1 15815 #define FTS5_SYNC 2 15816 #define FTS5_COMMIT 3 15817 #define FTS5_ROLLBACK 4 15818 #define FTS5_SAVEPOINT 5 15819 #define FTS5_RELEASE 6 15820 #define FTS5_ROLLBACKTO 7 15821 static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ 15822 switch( op ){ 15823 case FTS5_BEGIN: 15824 assert( p->ts.eState==0 ); 15825 p->ts.eState = 1; 15826 p->ts.iSavepoint = -1; 15827 break; 15828 15829 case FTS5_SYNC: 15830 assert( p->ts.eState==1 ); 15831 p->ts.eState = 2; 15832 break; 15833 15834 case FTS5_COMMIT: 15835 assert( p->ts.eState==2 ); 15836 p->ts.eState = 0; 15837 break; 15838 15839 case FTS5_ROLLBACK: 15840 assert( p->ts.eState==1 || p->ts.eState==2 || p->ts.eState==0 ); 15841 p->ts.eState = 0; 15842 break; 15843 15844 case FTS5_SAVEPOINT: 15845 assert( p->ts.eState==1 ); 15846 assert( iSavepoint>=0 ); 15847 assert( iSavepoint>=p->ts.iSavepoint ); 15848 p->ts.iSavepoint = iSavepoint; 15849 break; 15850 15851 case FTS5_RELEASE: 15852 assert( p->ts.eState==1 ); 15853 assert( iSavepoint>=0 ); 15854 assert( iSavepoint<=p->ts.iSavepoint ); 15855 p->ts.iSavepoint = iSavepoint-1; 15856 break; 15857 15858 case FTS5_ROLLBACKTO: 15859 assert( p->ts.eState==1 ); 15860 assert( iSavepoint>=-1 ); 15861 /* The following assert() can fail if another vtab strikes an error 15862 ** within an xSavepoint() call then SQLite calls xRollbackTo() - without 15863 ** having called xSavepoint() on this vtab. */ 15864 /* assert( iSavepoint<=p->ts.iSavepoint ); */ 15865 p->ts.iSavepoint = iSavepoint; 15866 break; 15867 } 15868 } 15869 #else 15870 # define fts5CheckTransactionState(x,y,z) 15871 #endif 15872 15873 /* 15874 ** Return true if pTab is a contentless table. 15875 */ 15876 static int fts5IsContentless(Fts5FullTable *pTab){ 15877 return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE; 15878 } 15879 15880 /* 15881 ** Delete a virtual table handle allocated by fts5InitVtab(). 15882 */ 15883 static void fts5FreeVtab(Fts5FullTable *pTab){ 15884 if( pTab ){ 15885 sqlite3Fts5IndexClose(pTab->p.pIndex); 15886 sqlite3Fts5StorageClose(pTab->pStorage); 15887 sqlite3Fts5ConfigFree(pTab->p.pConfig); 15888 sqlite3_free(pTab); 15889 } 15890 } 15891 15892 /* 15893 ** The xDisconnect() virtual table method. 15894 */ 15895 static int fts5DisconnectMethod(sqlite3_vtab *pVtab){ 15896 fts5FreeVtab((Fts5FullTable*)pVtab); 15897 return SQLITE_OK; 15898 } 15899 15900 /* 15901 ** The xDestroy() virtual table method. 15902 */ 15903 static int fts5DestroyMethod(sqlite3_vtab *pVtab){ 15904 Fts5Table *pTab = (Fts5Table*)pVtab; 15905 int rc = sqlite3Fts5DropAll(pTab->pConfig); 15906 if( rc==SQLITE_OK ){ 15907 fts5FreeVtab((Fts5FullTable*)pVtab); 15908 } 15909 return rc; 15910 } 15911 15912 /* 15913 ** This function is the implementation of both the xConnect and xCreate 15914 ** methods of the FTS3 virtual table. 15915 ** 15916 ** The argv[] array contains the following: 15917 ** 15918 ** argv[0] -> module name ("fts5") 15919 ** argv[1] -> database name 15920 ** argv[2] -> table name 15921 ** argv[...] -> "column name" and other module argument fields. 15922 */ 15923 static int fts5InitVtab( 15924 int bCreate, /* True for xCreate, false for xConnect */ 15925 sqlite3 *db, /* The SQLite database connection */ 15926 void *pAux, /* Hash table containing tokenizers */ 15927 int argc, /* Number of elements in argv array */ 15928 const char * const *argv, /* xCreate/xConnect argument array */ 15929 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ 15930 char **pzErr /* Write any error message here */ 15931 ){ 15932 Fts5Global *pGlobal = (Fts5Global*)pAux; 15933 const char **azConfig = (const char**)argv; 15934 int rc = SQLITE_OK; /* Return code */ 15935 Fts5Config *pConfig = 0; /* Results of parsing argc/argv */ 15936 Fts5FullTable *pTab = 0; /* New virtual table object */ 15937 15938 /* Allocate the new vtab object and parse the configuration */ 15939 pTab = (Fts5FullTable*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5FullTable)); 15940 if( rc==SQLITE_OK ){ 15941 rc = sqlite3Fts5ConfigParse(pGlobal, db, argc, azConfig, &pConfig, pzErr); 15942 assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); 15943 } 15944 if( rc==SQLITE_OK ){ 15945 pTab->p.pConfig = pConfig; 15946 pTab->pGlobal = pGlobal; 15947 } 15948 15949 /* Open the index sub-system */ 15950 if( rc==SQLITE_OK ){ 15951 rc = sqlite3Fts5IndexOpen(pConfig, bCreate, &pTab->p.pIndex, pzErr); 15952 } 15953 15954 /* Open the storage sub-system */ 15955 if( rc==SQLITE_OK ){ 15956 rc = sqlite3Fts5StorageOpen( 15957 pConfig, pTab->p.pIndex, bCreate, &pTab->pStorage, pzErr 15958 ); 15959 } 15960 15961 /* Call sqlite3_declare_vtab() */ 15962 if( rc==SQLITE_OK ){ 15963 rc = sqlite3Fts5ConfigDeclareVtab(pConfig); 15964 } 15965 15966 /* Load the initial configuration */ 15967 if( rc==SQLITE_OK ){ 15968 assert( pConfig->pzErrmsg==0 ); 15969 pConfig->pzErrmsg = pzErr; 15970 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); 15971 sqlite3Fts5IndexRollback(pTab->p.pIndex); 15972 pConfig->pzErrmsg = 0; 15973 } 15974 15975 if( rc!=SQLITE_OK ){ 15976 fts5FreeVtab(pTab); 15977 pTab = 0; 15978 }else if( bCreate ){ 15979 fts5CheckTransactionState(pTab, FTS5_BEGIN, 0); 15980 } 15981 *ppVTab = (sqlite3_vtab*)pTab; 15982 return rc; 15983 } 15984 15985 /* 15986 ** The xConnect() and xCreate() methods for the virtual table. All the 15987 ** work is done in function fts5InitVtab(). 15988 */ 15989 static int fts5ConnectMethod( 15990 sqlite3 *db, /* Database connection */ 15991 void *pAux, /* Pointer to tokenizer hash table */ 15992 int argc, /* Number of elements in argv array */ 15993 const char * const *argv, /* xCreate/xConnect argument array */ 15994 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ 15995 char **pzErr /* OUT: sqlite3_malloc'd error message */ 15996 ){ 15997 return fts5InitVtab(0, db, pAux, argc, argv, ppVtab, pzErr); 15998 } 15999 static int fts5CreateMethod( 16000 sqlite3 *db, /* Database connection */ 16001 void *pAux, /* Pointer to tokenizer hash table */ 16002 int argc, /* Number of elements in argv array */ 16003 const char * const *argv, /* xCreate/xConnect argument array */ 16004 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ 16005 char **pzErr /* OUT: sqlite3_malloc'd error message */ 16006 ){ 16007 return fts5InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); 16008 } 16009 16010 /* 16011 ** The different query plans. 16012 */ 16013 #define FTS5_PLAN_MATCH 1 /* (<tbl> MATCH ?) */ 16014 #define FTS5_PLAN_SOURCE 2 /* A source cursor for SORTED_MATCH */ 16015 #define FTS5_PLAN_SPECIAL 3 /* An internal query */ 16016 #define FTS5_PLAN_SORTED_MATCH 4 /* (<tbl> MATCH ? ORDER BY rank) */ 16017 #define FTS5_PLAN_SCAN 5 /* No usable constraint */ 16018 #define FTS5_PLAN_ROWID 6 /* (rowid = ?) */ 16019 16020 /* 16021 ** Set the SQLITE_INDEX_SCAN_UNIQUE flag in pIdxInfo->flags. Unless this 16022 ** extension is currently being used by a version of SQLite too old to 16023 ** support index-info flags. In that case this function is a no-op. 16024 */ 16025 static void fts5SetUniqueFlag(sqlite3_index_info *pIdxInfo){ 16026 #if SQLITE_VERSION_NUMBER>=3008012 16027 #ifndef SQLITE_CORE 16028 if( sqlite3_libversion_number()>=3008012 ) 16029 #endif 16030 { 16031 pIdxInfo->idxFlags |= SQLITE_INDEX_SCAN_UNIQUE; 16032 } 16033 #endif 16034 } 16035 16036 static int fts5UsePatternMatch( 16037 Fts5Config *pConfig, 16038 struct sqlite3_index_constraint *p 16039 ){ 16040 assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB ); 16041 assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE ); 16042 if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){ 16043 return 1; 16044 } 16045 if( pConfig->ePattern==FTS5_PATTERN_LIKE 16046 && (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB) 16047 ){ 16048 return 1; 16049 } 16050 return 0; 16051 } 16052 16053 /* 16054 ** Implementation of the xBestIndex method for FTS5 tables. Within the 16055 ** WHERE constraint, it searches for the following: 16056 ** 16057 ** 1. A MATCH constraint against the table column. 16058 ** 2. A MATCH constraint against the "rank" column. 16059 ** 3. A MATCH constraint against some other column. 16060 ** 4. An == constraint against the rowid column. 16061 ** 5. A < or <= constraint against the rowid column. 16062 ** 6. A > or >= constraint against the rowid column. 16063 ** 16064 ** Within the ORDER BY, the following are supported: 16065 ** 16066 ** 5. ORDER BY rank [ASC|DESC] 16067 ** 6. ORDER BY rowid [ASC|DESC] 16068 ** 16069 ** Information for the xFilter call is passed via both the idxNum and 16070 ** idxStr variables. Specifically, idxNum is a bitmask of the following 16071 ** flags used to encode the ORDER BY clause: 16072 ** 16073 ** FTS5_BI_ORDER_RANK 16074 ** FTS5_BI_ORDER_ROWID 16075 ** FTS5_BI_ORDER_DESC 16076 ** 16077 ** idxStr is used to encode data from the WHERE clause. For each argument 16078 ** passed to the xFilter method, the following is appended to idxStr: 16079 ** 16080 ** Match against table column: "m" 16081 ** Match against rank column: "r" 16082 ** Match against other column: "M<column-number>" 16083 ** LIKE against other column: "L<column-number>" 16084 ** GLOB against other column: "G<column-number>" 16085 ** Equality constraint against the rowid: "=" 16086 ** A < or <= against the rowid: "<" 16087 ** A > or >= against the rowid: ">" 16088 ** 16089 ** This function ensures that there is at most one "r" or "=". And that if 16090 ** there exists an "=" then there is no "<" or ">". 16091 ** 16092 ** Costs are assigned as follows: 16093 ** 16094 ** a) If an unusable MATCH operator is present in the WHERE clause, the 16095 ** cost is unconditionally set to 1e50 (a really big number). 16096 ** 16097 ** a) If a MATCH operator is present, the cost depends on the other 16098 ** constraints also present. As follows: 16099 ** 16100 ** * No other constraints: cost=1000.0 16101 ** * One rowid range constraint: cost=750.0 16102 ** * Both rowid range constraints: cost=500.0 16103 ** * An == rowid constraint: cost=100.0 16104 ** 16105 ** b) Otherwise, if there is no MATCH: 16106 ** 16107 ** * No other constraints: cost=1000000.0 16108 ** * One rowid range constraint: cost=750000.0 16109 ** * Both rowid range constraints: cost=250000.0 16110 ** * An == rowid constraint: cost=10.0 16111 ** 16112 ** Costs are not modified by the ORDER BY clause. 16113 */ 16114 static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ 16115 Fts5Table *pTab = (Fts5Table*)pVTab; 16116 Fts5Config *pConfig = pTab->pConfig; 16117 const int nCol = pConfig->nCol; 16118 int idxFlags = 0; /* Parameter passed through to xFilter() */ 16119 int i; 16120 16121 char *idxStr; 16122 int iIdxStr = 0; 16123 int iCons = 0; 16124 16125 int bSeenEq = 0; 16126 int bSeenGt = 0; 16127 int bSeenLt = 0; 16128 int bSeenMatch = 0; 16129 int bSeenRank = 0; 16130 16131 16132 assert( SQLITE_INDEX_CONSTRAINT_EQ<SQLITE_INDEX_CONSTRAINT_MATCH ); 16133 assert( SQLITE_INDEX_CONSTRAINT_GT<SQLITE_INDEX_CONSTRAINT_MATCH ); 16134 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH ); 16135 assert( SQLITE_INDEX_CONSTRAINT_GE<SQLITE_INDEX_CONSTRAINT_MATCH ); 16136 assert( SQLITE_INDEX_CONSTRAINT_LE<SQLITE_INDEX_CONSTRAINT_MATCH ); 16137 16138 if( pConfig->bLock ){ 16139 pTab->base.zErrMsg = sqlite3_mprintf( 16140 "recursively defined fts5 content table" 16141 ); 16142 return SQLITE_ERROR; 16143 } 16144 16145 idxStr = (char*)sqlite3_malloc(pInfo->nConstraint * 8 + 1); 16146 if( idxStr==0 ) return SQLITE_NOMEM; 16147 pInfo->idxStr = idxStr; 16148 pInfo->needToFreeIdxStr = 1; 16149 16150 for(i=0; i<pInfo->nConstraint; i++){ 16151 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; 16152 int iCol = p->iColumn; 16153 if( p->op==SQLITE_INDEX_CONSTRAINT_MATCH 16154 || (p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol>=nCol) 16155 ){ 16156 /* A MATCH operator or equivalent */ 16157 if( p->usable==0 || iCol<0 ){ 16158 /* As there exists an unusable MATCH constraint this is an 16159 ** unusable plan. Set a prohibitively high cost. */ 16160 pInfo->estimatedCost = 1e50; 16161 assert( iIdxStr < pInfo->nConstraint*6 + 1 ); 16162 idxStr[iIdxStr] = 0; 16163 return SQLITE_OK; 16164 }else{ 16165 if( iCol==nCol+1 ){ 16166 if( bSeenRank ) continue; 16167 idxStr[iIdxStr++] = 'r'; 16168 bSeenRank = 1; 16169 }else if( iCol>=0 ){ 16170 bSeenMatch = 1; 16171 idxStr[iIdxStr++] = 'M'; 16172 sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol); 16173 idxStr += strlen(&idxStr[iIdxStr]); 16174 assert( idxStr[iIdxStr]=='\0' ); 16175 } 16176 pInfo->aConstraintUsage[i].argvIndex = ++iCons; 16177 pInfo->aConstraintUsage[i].omit = 1; 16178 } 16179 }else if( p->usable ){ 16180 if( iCol>=0 && iCol<nCol && fts5UsePatternMatch(pConfig, p) ){ 16181 assert( p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB ); 16182 idxStr[iIdxStr++] = p->op==FTS5_PATTERN_LIKE ? 'L' : 'G'; 16183 sqlite3_snprintf(6, &idxStr[iIdxStr], "%d", iCol); 16184 idxStr += strlen(&idxStr[iIdxStr]); 16185 pInfo->aConstraintUsage[i].argvIndex = ++iCons; 16186 assert( idxStr[iIdxStr]=='\0' ); 16187 }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){ 16188 idxStr[iIdxStr++] = '='; 16189 bSeenEq = 1; 16190 pInfo->aConstraintUsage[i].argvIndex = ++iCons; 16191 } 16192 } 16193 } 16194 16195 if( bSeenEq==0 ){ 16196 for(i=0; i<pInfo->nConstraint; i++){ 16197 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; 16198 if( p->iColumn<0 && p->usable ){ 16199 int op = p->op; 16200 if( op==SQLITE_INDEX_CONSTRAINT_LT || op==SQLITE_INDEX_CONSTRAINT_LE ){ 16201 if( bSeenLt ) continue; 16202 idxStr[iIdxStr++] = '<'; 16203 pInfo->aConstraintUsage[i].argvIndex = ++iCons; 16204 bSeenLt = 1; 16205 }else 16206 if( op==SQLITE_INDEX_CONSTRAINT_GT || op==SQLITE_INDEX_CONSTRAINT_GE ){ 16207 if( bSeenGt ) continue; 16208 idxStr[iIdxStr++] = '>'; 16209 pInfo->aConstraintUsage[i].argvIndex = ++iCons; 16210 bSeenGt = 1; 16211 } 16212 } 16213 } 16214 } 16215 idxStr[iIdxStr] = '\0'; 16216 16217 /* Set idxFlags flags for the ORDER BY clause */ 16218 if( pInfo->nOrderBy==1 ){ 16219 int iSort = pInfo->aOrderBy[0].iColumn; 16220 if( iSort==(pConfig->nCol+1) && bSeenMatch ){ 16221 idxFlags |= FTS5_BI_ORDER_RANK; 16222 }else if( iSort==-1 ){ 16223 idxFlags |= FTS5_BI_ORDER_ROWID; 16224 } 16225 if( BitFlagTest(idxFlags, FTS5_BI_ORDER_RANK|FTS5_BI_ORDER_ROWID) ){ 16226 pInfo->orderByConsumed = 1; 16227 if( pInfo->aOrderBy[0].desc ){ 16228 idxFlags |= FTS5_BI_ORDER_DESC; 16229 } 16230 } 16231 } 16232 16233 /* Calculate the estimated cost based on the flags set in idxFlags. */ 16234 if( bSeenEq ){ 16235 pInfo->estimatedCost = bSeenMatch ? 100.0 : 10.0; 16236 if( bSeenMatch==0 ) fts5SetUniqueFlag(pInfo); 16237 }else if( bSeenLt && bSeenGt ){ 16238 pInfo->estimatedCost = bSeenMatch ? 500.0 : 250000.0; 16239 }else if( bSeenLt || bSeenGt ){ 16240 pInfo->estimatedCost = bSeenMatch ? 750.0 : 750000.0; 16241 }else{ 16242 pInfo->estimatedCost = bSeenMatch ? 1000.0 : 1000000.0; 16243 } 16244 16245 pInfo->idxNum = idxFlags; 16246 return SQLITE_OK; 16247 } 16248 16249 static int fts5NewTransaction(Fts5FullTable *pTab){ 16250 Fts5Cursor *pCsr; 16251 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ 16252 if( pCsr->base.pVtab==(sqlite3_vtab*)pTab ) return SQLITE_OK; 16253 } 16254 return sqlite3Fts5StorageReset(pTab->pStorage); 16255 } 16256 16257 /* 16258 ** Implementation of xOpen method. 16259 */ 16260 static int fts5OpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ 16261 Fts5FullTable *pTab = (Fts5FullTable*)pVTab; 16262 Fts5Config *pConfig = pTab->p.pConfig; 16263 Fts5Cursor *pCsr = 0; /* New cursor object */ 16264 sqlite3_int64 nByte; /* Bytes of space to allocate */ 16265 int rc; /* Return code */ 16266 16267 rc = fts5NewTransaction(pTab); 16268 if( rc==SQLITE_OK ){ 16269 nByte = sizeof(Fts5Cursor) + pConfig->nCol * sizeof(int); 16270 pCsr = (Fts5Cursor*)sqlite3_malloc64(nByte); 16271 if( pCsr ){ 16272 Fts5Global *pGlobal = pTab->pGlobal; 16273 memset(pCsr, 0, (size_t)nByte); 16274 pCsr->aColumnSize = (int*)&pCsr[1]; 16275 pCsr->pNext = pGlobal->pCsr; 16276 pGlobal->pCsr = pCsr; 16277 pCsr->iCsrId = ++pGlobal->iNextId; 16278 }else{ 16279 rc = SQLITE_NOMEM; 16280 } 16281 } 16282 *ppCsr = (sqlite3_vtab_cursor*)pCsr; 16283 return rc; 16284 } 16285 16286 static int fts5StmtType(Fts5Cursor *pCsr){ 16287 if( pCsr->ePlan==FTS5_PLAN_SCAN ){ 16288 return (pCsr->bDesc) ? FTS5_STMT_SCAN_DESC : FTS5_STMT_SCAN_ASC; 16289 } 16290 return FTS5_STMT_LOOKUP; 16291 } 16292 16293 /* 16294 ** This function is called after the cursor passed as the only argument 16295 ** is moved to point at a different row. It clears all cached data 16296 ** specific to the previous row stored by the cursor object. 16297 */ 16298 static void fts5CsrNewrow(Fts5Cursor *pCsr){ 16299 CsrFlagSet(pCsr, 16300 FTS5CSR_REQUIRE_CONTENT 16301 | FTS5CSR_REQUIRE_DOCSIZE 16302 | FTS5CSR_REQUIRE_INST 16303 | FTS5CSR_REQUIRE_POSLIST 16304 ); 16305 } 16306 16307 static void fts5FreeCursorComponents(Fts5Cursor *pCsr){ 16308 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 16309 Fts5Auxdata *pData; 16310 Fts5Auxdata *pNext; 16311 16312 sqlite3_free(pCsr->aInstIter); 16313 sqlite3_free(pCsr->aInst); 16314 if( pCsr->pStmt ){ 16315 int eStmt = fts5StmtType(pCsr); 16316 sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); 16317 } 16318 if( pCsr->pSorter ){ 16319 Fts5Sorter *pSorter = pCsr->pSorter; 16320 sqlite3_finalize(pSorter->pStmt); 16321 sqlite3_free(pSorter); 16322 } 16323 16324 if( pCsr->ePlan!=FTS5_PLAN_SOURCE ){ 16325 sqlite3Fts5ExprFree(pCsr->pExpr); 16326 } 16327 16328 for(pData=pCsr->pAuxdata; pData; pData=pNext){ 16329 pNext = pData->pNext; 16330 if( pData->xDelete ) pData->xDelete(pData->pPtr); 16331 sqlite3_free(pData); 16332 } 16333 16334 sqlite3_finalize(pCsr->pRankArgStmt); 16335 sqlite3_free(pCsr->apRankArg); 16336 16337 if( CsrFlagTest(pCsr, FTS5CSR_FREE_ZRANK) ){ 16338 sqlite3_free(pCsr->zRank); 16339 sqlite3_free(pCsr->zRankArgs); 16340 } 16341 16342 sqlite3Fts5IndexCloseReader(pTab->p.pIndex); 16343 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan - (u8*)pCsr)); 16344 } 16345 16346 16347 /* 16348 ** Close the cursor. For additional information see the documentation 16349 ** on the xClose method of the virtual table interface. 16350 */ 16351 static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ 16352 if( pCursor ){ 16353 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); 16354 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 16355 Fts5Cursor **pp; 16356 16357 fts5FreeCursorComponents(pCsr); 16358 /* Remove the cursor from the Fts5Global.pCsr list */ 16359 for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); 16360 *pp = pCsr->pNext; 16361 16362 sqlite3_free(pCsr); 16363 } 16364 return SQLITE_OK; 16365 } 16366 16367 static int fts5SorterNext(Fts5Cursor *pCsr){ 16368 Fts5Sorter *pSorter = pCsr->pSorter; 16369 int rc; 16370 16371 rc = sqlite3_step(pSorter->pStmt); 16372 if( rc==SQLITE_DONE ){ 16373 rc = SQLITE_OK; 16374 CsrFlagSet(pCsr, FTS5CSR_EOF|FTS5CSR_REQUIRE_CONTENT); 16375 }else if( rc==SQLITE_ROW ){ 16376 const u8 *a; 16377 const u8 *aBlob; 16378 int nBlob; 16379 int i; 16380 int iOff = 0; 16381 rc = SQLITE_OK; 16382 16383 pSorter->iRowid = sqlite3_column_int64(pSorter->pStmt, 0); 16384 nBlob = sqlite3_column_bytes(pSorter->pStmt, 1); 16385 aBlob = a = sqlite3_column_blob(pSorter->pStmt, 1); 16386 16387 /* nBlob==0 in detail=none mode. */ 16388 if( nBlob>0 ){ 16389 for(i=0; i<(pSorter->nIdx-1); i++){ 16390 int iVal; 16391 a += fts5GetVarint32(a, iVal); 16392 iOff += iVal; 16393 pSorter->aIdx[i] = iOff; 16394 } 16395 pSorter->aIdx[i] = &aBlob[nBlob] - a; 16396 pSorter->aPoslist = a; 16397 } 16398 16399 fts5CsrNewrow(pCsr); 16400 } 16401 16402 return rc; 16403 } 16404 16405 16406 /* 16407 ** Set the FTS5CSR_REQUIRE_RESEEK flag on all FTS5_PLAN_MATCH cursors 16408 ** open on table pTab. 16409 */ 16410 static void fts5TripCursors(Fts5FullTable *pTab){ 16411 Fts5Cursor *pCsr; 16412 for(pCsr=pTab->pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ 16413 if( pCsr->ePlan==FTS5_PLAN_MATCH 16414 && pCsr->base.pVtab==(sqlite3_vtab*)pTab 16415 ){ 16416 CsrFlagSet(pCsr, FTS5CSR_REQUIRE_RESEEK); 16417 } 16418 } 16419 } 16420 16421 /* 16422 ** If the REQUIRE_RESEEK flag is set on the cursor passed as the first 16423 ** argument, close and reopen all Fts5IndexIter iterators that the cursor 16424 ** is using. Then attempt to move the cursor to a rowid equal to or laster 16425 ** (in the cursors sort order - ASC or DESC) than the current rowid. 16426 ** 16427 ** If the new rowid is not equal to the old, set output parameter *pbSkip 16428 ** to 1 before returning. Otherwise, leave it unchanged. 16429 ** 16430 ** Return SQLITE_OK if successful or if no reseek was required, or an 16431 ** error code if an error occurred. 16432 */ 16433 static int fts5CursorReseek(Fts5Cursor *pCsr, int *pbSkip){ 16434 int rc = SQLITE_OK; 16435 assert( *pbSkip==0 ); 16436 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_RESEEK) ){ 16437 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 16438 int bDesc = pCsr->bDesc; 16439 i64 iRowid = sqlite3Fts5ExprRowid(pCsr->pExpr); 16440 16441 rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->p.pIndex, iRowid, bDesc); 16442 if( rc==SQLITE_OK && iRowid!=sqlite3Fts5ExprRowid(pCsr->pExpr) ){ 16443 *pbSkip = 1; 16444 } 16445 16446 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_RESEEK); 16447 fts5CsrNewrow(pCsr); 16448 if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ 16449 CsrFlagSet(pCsr, FTS5CSR_EOF); 16450 *pbSkip = 1; 16451 } 16452 } 16453 return rc; 16454 } 16455 16456 16457 /* 16458 ** Advance the cursor to the next row in the table that matches the 16459 ** search criteria. 16460 ** 16461 ** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned 16462 ** even if we reach end-of-file. The fts5EofMethod() will be called 16463 ** subsequently to determine whether or not an EOF was hit. 16464 */ 16465 static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ 16466 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 16467 int rc; 16468 16469 assert( (pCsr->ePlan<3)== 16470 (pCsr->ePlan==FTS5_PLAN_MATCH || pCsr->ePlan==FTS5_PLAN_SOURCE) 16471 ); 16472 assert( !CsrFlagTest(pCsr, FTS5CSR_EOF) ); 16473 16474 if( pCsr->ePlan<3 ){ 16475 int bSkip = 0; 16476 if( (rc = fts5CursorReseek(pCsr, &bSkip)) || bSkip ) return rc; 16477 rc = sqlite3Fts5ExprNext(pCsr->pExpr, pCsr->iLastRowid); 16478 CsrFlagSet(pCsr, sqlite3Fts5ExprEof(pCsr->pExpr)); 16479 fts5CsrNewrow(pCsr); 16480 }else{ 16481 switch( pCsr->ePlan ){ 16482 case FTS5_PLAN_SPECIAL: { 16483 CsrFlagSet(pCsr, FTS5CSR_EOF); 16484 rc = SQLITE_OK; 16485 break; 16486 } 16487 16488 case FTS5_PLAN_SORTED_MATCH: { 16489 rc = fts5SorterNext(pCsr); 16490 break; 16491 } 16492 16493 default: { 16494 Fts5Config *pConfig = ((Fts5Table*)pCursor->pVtab)->pConfig; 16495 pConfig->bLock++; 16496 rc = sqlite3_step(pCsr->pStmt); 16497 pConfig->bLock--; 16498 if( rc!=SQLITE_ROW ){ 16499 CsrFlagSet(pCsr, FTS5CSR_EOF); 16500 rc = sqlite3_reset(pCsr->pStmt); 16501 if( rc!=SQLITE_OK ){ 16502 pCursor->pVtab->zErrMsg = sqlite3_mprintf( 16503 "%s", sqlite3_errmsg(pConfig->db) 16504 ); 16505 } 16506 }else{ 16507 rc = SQLITE_OK; 16508 } 16509 break; 16510 } 16511 } 16512 } 16513 16514 return rc; 16515 } 16516 16517 16518 static int fts5PrepareStatement( 16519 sqlite3_stmt **ppStmt, 16520 Fts5Config *pConfig, 16521 const char *zFmt, 16522 ... 16523 ){ 16524 sqlite3_stmt *pRet = 0; 16525 int rc; 16526 char *zSql; 16527 va_list ap; 16528 16529 va_start(ap, zFmt); 16530 zSql = sqlite3_vmprintf(zFmt, ap); 16531 if( zSql==0 ){ 16532 rc = SQLITE_NOMEM; 16533 }else{ 16534 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, 16535 SQLITE_PREPARE_PERSISTENT, &pRet, 0); 16536 if( rc!=SQLITE_OK ){ 16537 *pConfig->pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(pConfig->db)); 16538 } 16539 sqlite3_free(zSql); 16540 } 16541 16542 va_end(ap); 16543 *ppStmt = pRet; 16544 return rc; 16545 } 16546 16547 static int fts5CursorFirstSorted( 16548 Fts5FullTable *pTab, 16549 Fts5Cursor *pCsr, 16550 int bDesc 16551 ){ 16552 Fts5Config *pConfig = pTab->p.pConfig; 16553 Fts5Sorter *pSorter; 16554 int nPhrase; 16555 sqlite3_int64 nByte; 16556 int rc; 16557 const char *zRank = pCsr->zRank; 16558 const char *zRankArgs = pCsr->zRankArgs; 16559 16560 nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); 16561 nByte = sizeof(Fts5Sorter) + sizeof(int) * (nPhrase-1); 16562 pSorter = (Fts5Sorter*)sqlite3_malloc64(nByte); 16563 if( pSorter==0 ) return SQLITE_NOMEM; 16564 memset(pSorter, 0, (size_t)nByte); 16565 pSorter->nIdx = nPhrase; 16566 16567 /* TODO: It would be better to have some system for reusing statement 16568 ** handles here, rather than preparing a new one for each query. But that 16569 ** is not possible as SQLite reference counts the virtual table objects. 16570 ** And since the statement required here reads from this very virtual 16571 ** table, saving it creates a circular reference. 16572 ** 16573 ** If SQLite a built-in statement cache, this wouldn't be a problem. */ 16574 rc = fts5PrepareStatement(&pSorter->pStmt, pConfig, 16575 "SELECT rowid, rank FROM %Q.%Q ORDER BY %s(\"%w\"%s%s) %s", 16576 pConfig->zDb, pConfig->zName, zRank, pConfig->zName, 16577 (zRankArgs ? ", " : ""), 16578 (zRankArgs ? zRankArgs : ""), 16579 bDesc ? "DESC" : "ASC" 16580 ); 16581 16582 pCsr->pSorter = pSorter; 16583 if( rc==SQLITE_OK ){ 16584 assert( pTab->pSortCsr==0 ); 16585 pTab->pSortCsr = pCsr; 16586 rc = fts5SorterNext(pCsr); 16587 pTab->pSortCsr = 0; 16588 } 16589 16590 if( rc!=SQLITE_OK ){ 16591 sqlite3_finalize(pSorter->pStmt); 16592 sqlite3_free(pSorter); 16593 pCsr->pSorter = 0; 16594 } 16595 16596 return rc; 16597 } 16598 16599 static int fts5CursorFirst(Fts5FullTable *pTab, Fts5Cursor *pCsr, int bDesc){ 16600 int rc; 16601 Fts5Expr *pExpr = pCsr->pExpr; 16602 rc = sqlite3Fts5ExprFirst(pExpr, pTab->p.pIndex, pCsr->iFirstRowid, bDesc); 16603 if( sqlite3Fts5ExprEof(pExpr) ){ 16604 CsrFlagSet(pCsr, FTS5CSR_EOF); 16605 } 16606 fts5CsrNewrow(pCsr); 16607 return rc; 16608 } 16609 16610 /* 16611 ** Process a "special" query. A special query is identified as one with a 16612 ** MATCH expression that begins with a '*' character. The remainder of 16613 ** the text passed to the MATCH operator are used as the special query 16614 ** parameters. 16615 */ 16616 static int fts5SpecialMatch( 16617 Fts5FullTable *pTab, 16618 Fts5Cursor *pCsr, 16619 const char *zQuery 16620 ){ 16621 int rc = SQLITE_OK; /* Return code */ 16622 const char *z = zQuery; /* Special query text */ 16623 int n; /* Number of bytes in text at z */ 16624 16625 while( z[0]==' ' ) z++; 16626 for(n=0; z[n] && z[n]!=' '; n++); 16627 16628 assert( pTab->p.base.zErrMsg==0 ); 16629 pCsr->ePlan = FTS5_PLAN_SPECIAL; 16630 16631 if( n==5 && 0==sqlite3_strnicmp("reads", z, n) ){ 16632 pCsr->iSpecial = sqlite3Fts5IndexReads(pTab->p.pIndex); 16633 } 16634 else if( n==2 && 0==sqlite3_strnicmp("id", z, n) ){ 16635 pCsr->iSpecial = pCsr->iCsrId; 16636 } 16637 else{ 16638 /* An unrecognized directive. Return an error message. */ 16639 pTab->p.base.zErrMsg = sqlite3_mprintf("unknown special query: %.*s", n, z); 16640 rc = SQLITE_ERROR; 16641 } 16642 16643 return rc; 16644 } 16645 16646 /* 16647 ** Search for an auxiliary function named zName that can be used with table 16648 ** pTab. If one is found, return a pointer to the corresponding Fts5Auxiliary 16649 ** structure. Otherwise, if no such function exists, return NULL. 16650 */ 16651 static Fts5Auxiliary *fts5FindAuxiliary(Fts5FullTable *pTab, const char *zName){ 16652 Fts5Auxiliary *pAux; 16653 16654 for(pAux=pTab->pGlobal->pAux; pAux; pAux=pAux->pNext){ 16655 if( sqlite3_stricmp(zName, pAux->zFunc)==0 ) return pAux; 16656 } 16657 16658 /* No function of the specified name was found. Return 0. */ 16659 return 0; 16660 } 16661 16662 16663 static int fts5FindRankFunction(Fts5Cursor *pCsr){ 16664 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 16665 Fts5Config *pConfig = pTab->p.pConfig; 16666 int rc = SQLITE_OK; 16667 Fts5Auxiliary *pAux = 0; 16668 const char *zRank = pCsr->zRank; 16669 const char *zRankArgs = pCsr->zRankArgs; 16670 16671 if( zRankArgs ){ 16672 char *zSql = sqlite3Fts5Mprintf(&rc, "SELECT %s", zRankArgs); 16673 if( zSql ){ 16674 sqlite3_stmt *pStmt = 0; 16675 rc = sqlite3_prepare_v3(pConfig->db, zSql, -1, 16676 SQLITE_PREPARE_PERSISTENT, &pStmt, 0); 16677 sqlite3_free(zSql); 16678 assert( rc==SQLITE_OK || pCsr->pRankArgStmt==0 ); 16679 if( rc==SQLITE_OK ){ 16680 if( SQLITE_ROW==sqlite3_step(pStmt) ){ 16681 sqlite3_int64 nByte; 16682 pCsr->nRankArg = sqlite3_column_count(pStmt); 16683 nByte = sizeof(sqlite3_value*)*pCsr->nRankArg; 16684 pCsr->apRankArg = (sqlite3_value**)sqlite3Fts5MallocZero(&rc, nByte); 16685 if( rc==SQLITE_OK ){ 16686 int i; 16687 for(i=0; i<pCsr->nRankArg; i++){ 16688 pCsr->apRankArg[i] = sqlite3_column_value(pStmt, i); 16689 } 16690 } 16691 pCsr->pRankArgStmt = pStmt; 16692 }else{ 16693 rc = sqlite3_finalize(pStmt); 16694 assert( rc!=SQLITE_OK ); 16695 } 16696 } 16697 } 16698 } 16699 16700 if( rc==SQLITE_OK ){ 16701 pAux = fts5FindAuxiliary(pTab, zRank); 16702 if( pAux==0 ){ 16703 assert( pTab->p.base.zErrMsg==0 ); 16704 pTab->p.base.zErrMsg = sqlite3_mprintf("no such function: %s", zRank); 16705 rc = SQLITE_ERROR; 16706 } 16707 } 16708 16709 pCsr->pRank = pAux; 16710 return rc; 16711 } 16712 16713 16714 static int fts5CursorParseRank( 16715 Fts5Config *pConfig, 16716 Fts5Cursor *pCsr, 16717 sqlite3_value *pRank 16718 ){ 16719 int rc = SQLITE_OK; 16720 if( pRank ){ 16721 const char *z = (const char*)sqlite3_value_text(pRank); 16722 char *zRank = 0; 16723 char *zRankArgs = 0; 16724 16725 if( z==0 ){ 16726 if( sqlite3_value_type(pRank)==SQLITE_NULL ) rc = SQLITE_ERROR; 16727 }else{ 16728 rc = sqlite3Fts5ConfigParseRank(z, &zRank, &zRankArgs); 16729 } 16730 if( rc==SQLITE_OK ){ 16731 pCsr->zRank = zRank; 16732 pCsr->zRankArgs = zRankArgs; 16733 CsrFlagSet(pCsr, FTS5CSR_FREE_ZRANK); 16734 }else if( rc==SQLITE_ERROR ){ 16735 pCsr->base.pVtab->zErrMsg = sqlite3_mprintf( 16736 "parse error in rank function: %s", z 16737 ); 16738 } 16739 }else{ 16740 if( pConfig->zRank ){ 16741 pCsr->zRank = (char*)pConfig->zRank; 16742 pCsr->zRankArgs = (char*)pConfig->zRankArgs; 16743 }else{ 16744 pCsr->zRank = (char*)FTS5_DEFAULT_RANK; 16745 pCsr->zRankArgs = 0; 16746 } 16747 } 16748 return rc; 16749 } 16750 16751 static i64 fts5GetRowidLimit(sqlite3_value *pVal, i64 iDefault){ 16752 if( pVal ){ 16753 int eType = sqlite3_value_numeric_type(pVal); 16754 if( eType==SQLITE_INTEGER ){ 16755 return sqlite3_value_int64(pVal); 16756 } 16757 } 16758 return iDefault; 16759 } 16760 16761 /* 16762 ** This is the xFilter interface for the virtual table. See 16763 ** the virtual table xFilter method documentation for additional 16764 ** information. 16765 ** 16766 ** There are three possible query strategies: 16767 ** 16768 ** 1. Full-text search using a MATCH operator. 16769 ** 2. A by-rowid lookup. 16770 ** 3. A full-table scan. 16771 */ 16772 static int fts5FilterMethod( 16773 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ 16774 int idxNum, /* Strategy index */ 16775 const char *idxStr, /* Unused */ 16776 int nVal, /* Number of elements in apVal */ 16777 sqlite3_value **apVal /* Arguments for the indexing scheme */ 16778 ){ 16779 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); 16780 Fts5Config *pConfig = pTab->p.pConfig; 16781 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 16782 int rc = SQLITE_OK; /* Error code */ 16783 int bDesc; /* True if ORDER BY [rank|rowid] DESC */ 16784 int bOrderByRank; /* True if ORDER BY rank */ 16785 sqlite3_value *pRank = 0; /* rank MATCH ? expression (or NULL) */ 16786 sqlite3_value *pRowidEq = 0; /* rowid = ? expression (or NULL) */ 16787 sqlite3_value *pRowidLe = 0; /* rowid <= ? expression (or NULL) */ 16788 sqlite3_value *pRowidGe = 0; /* rowid >= ? expression (or NULL) */ 16789 int iCol; /* Column on LHS of MATCH operator */ 16790 char **pzErrmsg = pConfig->pzErrmsg; 16791 int i; 16792 int iIdxStr = 0; 16793 Fts5Expr *pExpr = 0; 16794 16795 if( pConfig->bLock ){ 16796 pTab->p.base.zErrMsg = sqlite3_mprintf( 16797 "recursively defined fts5 content table" 16798 ); 16799 return SQLITE_ERROR; 16800 } 16801 16802 if( pCsr->ePlan ){ 16803 fts5FreeCursorComponents(pCsr); 16804 memset(&pCsr->ePlan, 0, sizeof(Fts5Cursor) - ((u8*)&pCsr->ePlan-(u8*)pCsr)); 16805 } 16806 16807 assert( pCsr->pStmt==0 ); 16808 assert( pCsr->pExpr==0 ); 16809 assert( pCsr->csrflags==0 ); 16810 assert( pCsr->pRank==0 ); 16811 assert( pCsr->zRank==0 ); 16812 assert( pCsr->zRankArgs==0 ); 16813 assert( pTab->pSortCsr==0 || nVal==0 ); 16814 16815 assert( pzErrmsg==0 || pzErrmsg==&pTab->p.base.zErrMsg ); 16816 pConfig->pzErrmsg = &pTab->p.base.zErrMsg; 16817 16818 /* Decode the arguments passed through to this function. */ 16819 for(i=0; i<nVal; i++){ 16820 switch( idxStr[iIdxStr++] ){ 16821 case 'r': 16822 pRank = apVal[i]; 16823 break; 16824 case 'M': { 16825 const char *zText = (const char*)sqlite3_value_text(apVal[i]); 16826 if( zText==0 ) zText = ""; 16827 iCol = 0; 16828 do{ 16829 iCol = iCol*10 + (idxStr[iIdxStr]-'0'); 16830 iIdxStr++; 16831 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); 16832 16833 if( zText[0]=='*' ){ 16834 /* The user has issued a query of the form "MATCH '*...'". This 16835 ** indicates that the MATCH expression is not a full text query, 16836 ** but a request for an internal parameter. */ 16837 rc = fts5SpecialMatch(pTab, pCsr, &zText[1]); 16838 goto filter_out; 16839 }else{ 16840 char **pzErr = &pTab->p.base.zErrMsg; 16841 rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr); 16842 if( rc==SQLITE_OK ){ 16843 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); 16844 pExpr = 0; 16845 } 16846 if( rc!=SQLITE_OK ) goto filter_out; 16847 } 16848 16849 break; 16850 } 16851 case 'L': 16852 case 'G': { 16853 int bGlob = (idxStr[iIdxStr-1]=='G'); 16854 const char *zText = (const char*)sqlite3_value_text(apVal[i]); 16855 iCol = 0; 16856 do{ 16857 iCol = iCol*10 + (idxStr[iIdxStr]-'0'); 16858 iIdxStr++; 16859 }while( idxStr[iIdxStr]>='0' && idxStr[iIdxStr]<='9' ); 16860 if( zText ){ 16861 rc = sqlite3Fts5ExprPattern(pConfig, bGlob, iCol, zText, &pExpr); 16862 } 16863 if( rc==SQLITE_OK ){ 16864 rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr); 16865 pExpr = 0; 16866 } 16867 if( rc!=SQLITE_OK ) goto filter_out; 16868 break; 16869 } 16870 case '=': 16871 pRowidEq = apVal[i]; 16872 break; 16873 case '<': 16874 pRowidLe = apVal[i]; 16875 break; 16876 default: assert( idxStr[iIdxStr-1]=='>' ); 16877 pRowidGe = apVal[i]; 16878 break; 16879 } 16880 } 16881 bOrderByRank = ((idxNum & FTS5_BI_ORDER_RANK) ? 1 : 0); 16882 pCsr->bDesc = bDesc = ((idxNum & FTS5_BI_ORDER_DESC) ? 1 : 0); 16883 16884 /* Set the cursor upper and lower rowid limits. Only some strategies 16885 ** actually use them. This is ok, as the xBestIndex() method leaves the 16886 ** sqlite3_index_constraint.omit flag clear for range constraints 16887 ** on the rowid field. */ 16888 if( pRowidEq ){ 16889 pRowidLe = pRowidGe = pRowidEq; 16890 } 16891 if( bDesc ){ 16892 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); 16893 pCsr->iLastRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); 16894 }else{ 16895 pCsr->iLastRowid = fts5GetRowidLimit(pRowidLe, LARGEST_INT64); 16896 pCsr->iFirstRowid = fts5GetRowidLimit(pRowidGe, SMALLEST_INT64); 16897 } 16898 16899 if( pTab->pSortCsr ){ 16900 /* If pSortCsr is non-NULL, then this call is being made as part of 16901 ** processing for a "... MATCH <expr> ORDER BY rank" query (ePlan is 16902 ** set to FTS5_PLAN_SORTED_MATCH). pSortCsr is the cursor that will 16903 ** return results to the user for this query. The current cursor 16904 ** (pCursor) is used to execute the query issued by function 16905 ** fts5CursorFirstSorted() above. */ 16906 assert( pRowidEq==0 && pRowidLe==0 && pRowidGe==0 && pRank==0 ); 16907 assert( nVal==0 && bOrderByRank==0 && bDesc==0 ); 16908 assert( pCsr->iLastRowid==LARGEST_INT64 ); 16909 assert( pCsr->iFirstRowid==SMALLEST_INT64 ); 16910 if( pTab->pSortCsr->bDesc ){ 16911 pCsr->iLastRowid = pTab->pSortCsr->iFirstRowid; 16912 pCsr->iFirstRowid = pTab->pSortCsr->iLastRowid; 16913 }else{ 16914 pCsr->iLastRowid = pTab->pSortCsr->iLastRowid; 16915 pCsr->iFirstRowid = pTab->pSortCsr->iFirstRowid; 16916 } 16917 pCsr->ePlan = FTS5_PLAN_SOURCE; 16918 pCsr->pExpr = pTab->pSortCsr->pExpr; 16919 rc = fts5CursorFirst(pTab, pCsr, bDesc); 16920 }else if( pCsr->pExpr ){ 16921 rc = fts5CursorParseRank(pConfig, pCsr, pRank); 16922 if( rc==SQLITE_OK ){ 16923 if( bOrderByRank ){ 16924 pCsr->ePlan = FTS5_PLAN_SORTED_MATCH; 16925 rc = fts5CursorFirstSorted(pTab, pCsr, bDesc); 16926 }else{ 16927 pCsr->ePlan = FTS5_PLAN_MATCH; 16928 rc = fts5CursorFirst(pTab, pCsr, bDesc); 16929 } 16930 } 16931 }else if( pConfig->zContent==0 ){ 16932 *pConfig->pzErrmsg = sqlite3_mprintf( 16933 "%s: table does not support scanning", pConfig->zName 16934 ); 16935 rc = SQLITE_ERROR; 16936 }else{ 16937 /* This is either a full-table scan (ePlan==FTS5_PLAN_SCAN) or a lookup 16938 ** by rowid (ePlan==FTS5_PLAN_ROWID). */ 16939 pCsr->ePlan = (pRowidEq ? FTS5_PLAN_ROWID : FTS5_PLAN_SCAN); 16940 rc = sqlite3Fts5StorageStmt( 16941 pTab->pStorage, fts5StmtType(pCsr), &pCsr->pStmt, &pTab->p.base.zErrMsg 16942 ); 16943 if( rc==SQLITE_OK ){ 16944 if( pRowidEq!=0 ){ 16945 assert( pCsr->ePlan==FTS5_PLAN_ROWID ); 16946 sqlite3_bind_value(pCsr->pStmt, 1, pRowidEq); 16947 }else{ 16948 sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iFirstRowid); 16949 sqlite3_bind_int64(pCsr->pStmt, 2, pCsr->iLastRowid); 16950 } 16951 rc = fts5NextMethod(pCursor); 16952 } 16953 } 16954 16955 filter_out: 16956 sqlite3Fts5ExprFree(pExpr); 16957 pConfig->pzErrmsg = pzErrmsg; 16958 return rc; 16959 } 16960 16961 /* 16962 ** This is the xEof method of the virtual table. SQLite calls this 16963 ** routine to find out if it has reached the end of a result set. 16964 */ 16965 static int fts5EofMethod(sqlite3_vtab_cursor *pCursor){ 16966 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 16967 return (CsrFlagTest(pCsr, FTS5CSR_EOF) ? 1 : 0); 16968 } 16969 16970 /* 16971 ** Return the rowid that the cursor currently points to. 16972 */ 16973 static i64 fts5CursorRowid(Fts5Cursor *pCsr){ 16974 assert( pCsr->ePlan==FTS5_PLAN_MATCH 16975 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH 16976 || pCsr->ePlan==FTS5_PLAN_SOURCE 16977 ); 16978 if( pCsr->pSorter ){ 16979 return pCsr->pSorter->iRowid; 16980 }else{ 16981 return sqlite3Fts5ExprRowid(pCsr->pExpr); 16982 } 16983 } 16984 16985 /* 16986 ** This is the xRowid method. The SQLite core calls this routine to 16987 ** retrieve the rowid for the current row of the result set. fts5 16988 ** exposes %_content.rowid as the rowid for the virtual table. The 16989 ** rowid should be written to *pRowid. 16990 */ 16991 static int fts5RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ 16992 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 16993 int ePlan = pCsr->ePlan; 16994 16995 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); 16996 switch( ePlan ){ 16997 case FTS5_PLAN_SPECIAL: 16998 *pRowid = 0; 16999 break; 17000 17001 case FTS5_PLAN_SOURCE: 17002 case FTS5_PLAN_MATCH: 17003 case FTS5_PLAN_SORTED_MATCH: 17004 *pRowid = fts5CursorRowid(pCsr); 17005 break; 17006 17007 default: 17008 *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); 17009 break; 17010 } 17011 17012 return SQLITE_OK; 17013 } 17014 17015 /* 17016 ** If the cursor requires seeking (bSeekRequired flag is set), seek it. 17017 ** Return SQLITE_OK if no error occurs, or an SQLite error code otherwise. 17018 ** 17019 ** If argument bErrormsg is true and an error occurs, an error message may 17020 ** be left in sqlite3_vtab.zErrMsg. 17021 */ 17022 static int fts5SeekCursor(Fts5Cursor *pCsr, int bErrormsg){ 17023 int rc = SQLITE_OK; 17024 17025 /* If the cursor does not yet have a statement handle, obtain one now. */ 17026 if( pCsr->pStmt==0 ){ 17027 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 17028 int eStmt = fts5StmtType(pCsr); 17029 rc = sqlite3Fts5StorageStmt( 17030 pTab->pStorage, eStmt, &pCsr->pStmt, (bErrormsg?&pTab->p.base.zErrMsg:0) 17031 ); 17032 assert( rc!=SQLITE_OK || pTab->p.base.zErrMsg==0 ); 17033 assert( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ); 17034 } 17035 17036 if( rc==SQLITE_OK && CsrFlagTest(pCsr, FTS5CSR_REQUIRE_CONTENT) ){ 17037 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); 17038 assert( pCsr->pExpr ); 17039 sqlite3_reset(pCsr->pStmt); 17040 sqlite3_bind_int64(pCsr->pStmt, 1, fts5CursorRowid(pCsr)); 17041 pTab->pConfig->bLock++; 17042 rc = sqlite3_step(pCsr->pStmt); 17043 pTab->pConfig->bLock--; 17044 if( rc==SQLITE_ROW ){ 17045 rc = SQLITE_OK; 17046 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_CONTENT); 17047 }else{ 17048 rc = sqlite3_reset(pCsr->pStmt); 17049 if( rc==SQLITE_OK ){ 17050 rc = FTS5_CORRUPT; 17051 }else if( pTab->pConfig->pzErrmsg ){ 17052 *pTab->pConfig->pzErrmsg = sqlite3_mprintf( 17053 "%s", sqlite3_errmsg(pTab->pConfig->db) 17054 ); 17055 } 17056 } 17057 } 17058 return rc; 17059 } 17060 17061 static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ 17062 va_list ap; /* ... printf arguments */ 17063 va_start(ap, zFormat); 17064 assert( p->p.base.zErrMsg==0 ); 17065 p->p.base.zErrMsg = sqlite3_vmprintf(zFormat, ap); 17066 va_end(ap); 17067 } 17068 17069 /* 17070 ** This function is called to handle an FTS INSERT command. In other words, 17071 ** an INSERT statement of the form: 17072 ** 17073 ** INSERT INTO fts(fts) VALUES($pCmd) 17074 ** INSERT INTO fts(fts, rank) VALUES($pCmd, $pVal) 17075 ** 17076 ** Argument pVal is the value assigned to column "fts" by the INSERT 17077 ** statement. This function returns SQLITE_OK if successful, or an SQLite 17078 ** error code if an error occurs. 17079 ** 17080 ** The commands implemented by this function are documented in the "Special 17081 ** INSERT Directives" section of the documentation. It should be updated if 17082 ** more commands are added to this function. 17083 */ 17084 static int fts5SpecialInsert( 17085 Fts5FullTable *pTab, /* Fts5 table object */ 17086 const char *zCmd, /* Text inserted into table-name column */ 17087 sqlite3_value *pVal /* Value inserted into rank column */ 17088 ){ 17089 Fts5Config *pConfig = pTab->p.pConfig; 17090 int rc = SQLITE_OK; 17091 int bError = 0; 17092 17093 if( 0==sqlite3_stricmp("delete-all", zCmd) ){ 17094 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ 17095 fts5SetVtabError(pTab, 17096 "'delete-all' may only be used with a " 17097 "contentless or external content fts5 table" 17098 ); 17099 rc = SQLITE_ERROR; 17100 }else{ 17101 rc = sqlite3Fts5StorageDeleteAll(pTab->pStorage); 17102 } 17103 }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){ 17104 if( pConfig->eContent==FTS5_CONTENT_NONE ){ 17105 fts5SetVtabError(pTab, 17106 "'rebuild' may not be used with a contentless fts5 table" 17107 ); 17108 rc = SQLITE_ERROR; 17109 }else{ 17110 rc = sqlite3Fts5StorageRebuild(pTab->pStorage); 17111 } 17112 }else if( 0==sqlite3_stricmp("optimize", zCmd) ){ 17113 rc = sqlite3Fts5StorageOptimize(pTab->pStorage); 17114 }else if( 0==sqlite3_stricmp("merge", zCmd) ){ 17115 int nMerge = sqlite3_value_int(pVal); 17116 rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); 17117 }else if( 0==sqlite3_stricmp("integrity-check", zCmd) ){ 17118 int iArg = sqlite3_value_int(pVal); 17119 rc = sqlite3Fts5StorageIntegrity(pTab->pStorage, iArg); 17120 #ifdef SQLITE_DEBUG 17121 }else if( 0==sqlite3_stricmp("prefix-index", zCmd) ){ 17122 pConfig->bPrefixIndex = sqlite3_value_int(pVal); 17123 #endif 17124 }else{ 17125 rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); 17126 if( rc==SQLITE_OK ){ 17127 rc = sqlite3Fts5ConfigSetValue(pTab->p.pConfig, zCmd, pVal, &bError); 17128 } 17129 if( rc==SQLITE_OK ){ 17130 if( bError ){ 17131 rc = SQLITE_ERROR; 17132 }else{ 17133 rc = sqlite3Fts5StorageConfigValue(pTab->pStorage, zCmd, pVal, 0); 17134 } 17135 } 17136 } 17137 return rc; 17138 } 17139 17140 static int fts5SpecialDelete( 17141 Fts5FullTable *pTab, 17142 sqlite3_value **apVal 17143 ){ 17144 int rc = SQLITE_OK; 17145 int eType1 = sqlite3_value_type(apVal[1]); 17146 if( eType1==SQLITE_INTEGER ){ 17147 sqlite3_int64 iDel = sqlite3_value_int64(apVal[1]); 17148 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, &apVal[2]); 17149 } 17150 return rc; 17151 } 17152 17153 static void fts5StorageInsert( 17154 int *pRc, 17155 Fts5FullTable *pTab, 17156 sqlite3_value **apVal, 17157 i64 *piRowid 17158 ){ 17159 int rc = *pRc; 17160 if( rc==SQLITE_OK ){ 17161 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid); 17162 } 17163 if( rc==SQLITE_OK ){ 17164 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); 17165 } 17166 *pRc = rc; 17167 } 17168 17169 /* 17170 ** This function is the implementation of the xUpdate callback used by 17171 ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be 17172 ** inserted, updated or deleted. 17173 ** 17174 ** A delete specifies a single argument - the rowid of the row to remove. 17175 ** 17176 ** Update and insert operations pass: 17177 ** 17178 ** 1. The "old" rowid, or NULL. 17179 ** 2. The "new" rowid. 17180 ** 3. Values for each of the nCol matchable columns. 17181 ** 4. Values for the two hidden columns (<tablename> and "rank"). 17182 */ 17183 static int fts5UpdateMethod( 17184 sqlite3_vtab *pVtab, /* Virtual table handle */ 17185 int nArg, /* Size of argument array */ 17186 sqlite3_value **apVal, /* Array of arguments */ 17187 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ 17188 ){ 17189 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 17190 Fts5Config *pConfig = pTab->p.pConfig; 17191 int eType0; /* value_type() of apVal[0] */ 17192 int rc = SQLITE_OK; /* Return code */ 17193 17194 /* A transaction must be open when this is called. */ 17195 assert( pTab->ts.eState==1 ); 17196 17197 assert( pVtab->zErrMsg==0 ); 17198 assert( nArg==1 || nArg==(2+pConfig->nCol+2) ); 17199 assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER 17200 || sqlite3_value_type(apVal[0])==SQLITE_NULL 17201 ); 17202 assert( pTab->p.pConfig->pzErrmsg==0 ); 17203 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; 17204 17205 /* Put any active cursors into REQUIRE_SEEK state. */ 17206 fts5TripCursors(pTab); 17207 17208 eType0 = sqlite3_value_type(apVal[0]); 17209 if( eType0==SQLITE_NULL 17210 && sqlite3_value_type(apVal[2+pConfig->nCol])!=SQLITE_NULL 17211 ){ 17212 /* A "special" INSERT op. These are handled separately. */ 17213 const char *z = (const char*)sqlite3_value_text(apVal[2+pConfig->nCol]); 17214 if( pConfig->eContent!=FTS5_CONTENT_NORMAL 17215 && 0==sqlite3_stricmp("delete", z) 17216 ){ 17217 rc = fts5SpecialDelete(pTab, apVal); 17218 }else{ 17219 rc = fts5SpecialInsert(pTab, z, apVal[2 + pConfig->nCol + 1]); 17220 } 17221 }else{ 17222 /* A regular INSERT, UPDATE or DELETE statement. The trick here is that 17223 ** any conflict on the rowid value must be detected before any 17224 ** modifications are made to the database file. There are 4 cases: 17225 ** 17226 ** 1) DELETE 17227 ** 2) UPDATE (rowid not modified) 17228 ** 3) UPDATE (rowid modified) 17229 ** 4) INSERT 17230 ** 17231 ** Cases 3 and 4 may violate the rowid constraint. 17232 */ 17233 int eConflict = SQLITE_ABORT; 17234 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ 17235 eConflict = sqlite3_vtab_on_conflict(pConfig->db); 17236 } 17237 17238 assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); 17239 assert( nArg!=1 || eType0==SQLITE_INTEGER ); 17240 17241 /* Filter out attempts to run UPDATE or DELETE on contentless tables. 17242 ** This is not suported. */ 17243 if( eType0==SQLITE_INTEGER && fts5IsContentless(pTab) ){ 17244 pTab->p.base.zErrMsg = sqlite3_mprintf( 17245 "cannot %s contentless fts5 table: %s", 17246 (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName 17247 ); 17248 rc = SQLITE_ERROR; 17249 } 17250 17251 /* DELETE */ 17252 else if( nArg==1 ){ 17253 i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ 17254 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0); 17255 } 17256 17257 /* INSERT or UPDATE */ 17258 else{ 17259 int eType1 = sqlite3_value_numeric_type(apVal[1]); 17260 17261 if( eType1!=SQLITE_INTEGER && eType1!=SQLITE_NULL ){ 17262 rc = SQLITE_MISMATCH; 17263 } 17264 17265 else if( eType0!=SQLITE_INTEGER ){ 17266 /* If this is a REPLACE, first remove the current entry (if any) */ 17267 if( eConflict==SQLITE_REPLACE && eType1==SQLITE_INTEGER ){ 17268 i64 iNew = sqlite3_value_int64(apVal[1]); /* Rowid to delete */ 17269 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); 17270 } 17271 fts5StorageInsert(&rc, pTab, apVal, pRowid); 17272 } 17273 17274 /* UPDATE */ 17275 else{ 17276 i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ 17277 i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ 17278 if( eType1==SQLITE_INTEGER && iOld!=iNew ){ 17279 if( eConflict==SQLITE_REPLACE ){ 17280 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); 17281 if( rc==SQLITE_OK ){ 17282 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0); 17283 } 17284 fts5StorageInsert(&rc, pTab, apVal, pRowid); 17285 }else{ 17286 rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, pRowid); 17287 if( rc==SQLITE_OK ){ 17288 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); 17289 } 17290 if( rc==SQLITE_OK ){ 17291 rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid); 17292 } 17293 } 17294 }else{ 17295 rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0); 17296 fts5StorageInsert(&rc, pTab, apVal, pRowid); 17297 } 17298 } 17299 } 17300 } 17301 17302 pTab->p.pConfig->pzErrmsg = 0; 17303 return rc; 17304 } 17305 17306 /* 17307 ** Implementation of xSync() method. 17308 */ 17309 static int fts5SyncMethod(sqlite3_vtab *pVtab){ 17310 int rc; 17311 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 17312 fts5CheckTransactionState(pTab, FTS5_SYNC, 0); 17313 pTab->p.pConfig->pzErrmsg = &pTab->p.base.zErrMsg; 17314 fts5TripCursors(pTab); 17315 rc = sqlite3Fts5StorageSync(pTab->pStorage); 17316 pTab->p.pConfig->pzErrmsg = 0; 17317 return rc; 17318 } 17319 17320 /* 17321 ** Implementation of xBegin() method. 17322 */ 17323 static int fts5BeginMethod(sqlite3_vtab *pVtab){ 17324 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_BEGIN, 0); 17325 fts5NewTransaction((Fts5FullTable*)pVtab); 17326 return SQLITE_OK; 17327 } 17328 17329 /* 17330 ** Implementation of xCommit() method. This is a no-op. The contents of 17331 ** the pending-terms hash-table have already been flushed into the database 17332 ** by fts5SyncMethod(). 17333 */ 17334 static int fts5CommitMethod(sqlite3_vtab *pVtab){ 17335 UNUSED_PARAM(pVtab); /* Call below is a no-op for NDEBUG builds */ 17336 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_COMMIT, 0); 17337 return SQLITE_OK; 17338 } 17339 17340 /* 17341 ** Implementation of xRollback(). Discard the contents of the pending-terms 17342 ** hash-table. Any changes made to the database are reverted by SQLite. 17343 */ 17344 static int fts5RollbackMethod(sqlite3_vtab *pVtab){ 17345 int rc; 17346 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 17347 fts5CheckTransactionState(pTab, FTS5_ROLLBACK, 0); 17348 rc = sqlite3Fts5StorageRollback(pTab->pStorage); 17349 return rc; 17350 } 17351 17352 static int fts5CsrPoslist(Fts5Cursor*, int, const u8**, int*); 17353 17354 static void *fts5ApiUserData(Fts5Context *pCtx){ 17355 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17356 return pCsr->pAux->pUserData; 17357 } 17358 17359 static int fts5ApiColumnCount(Fts5Context *pCtx){ 17360 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17361 return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; 17362 } 17363 17364 static int fts5ApiColumnTotalSize( 17365 Fts5Context *pCtx, 17366 int iCol, 17367 sqlite3_int64 *pnToken 17368 ){ 17369 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17370 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 17371 return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); 17372 } 17373 17374 static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ 17375 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17376 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 17377 return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); 17378 } 17379 17380 static int fts5ApiTokenize( 17381 Fts5Context *pCtx, 17382 const char *pText, int nText, 17383 void *pUserData, 17384 int (*xToken)(void*, int, const char*, int, int, int) 17385 ){ 17386 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17387 Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); 17388 return sqlite3Fts5Tokenize( 17389 pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken 17390 ); 17391 } 17392 17393 static int fts5ApiPhraseCount(Fts5Context *pCtx){ 17394 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17395 return sqlite3Fts5ExprPhraseCount(pCsr->pExpr); 17396 } 17397 17398 static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ 17399 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17400 return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); 17401 } 17402 17403 static int fts5ApiColumnText( 17404 Fts5Context *pCtx, 17405 int iCol, 17406 const char **pz, 17407 int *pn 17408 ){ 17409 int rc = SQLITE_OK; 17410 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17411 if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) 17412 || pCsr->ePlan==FTS5_PLAN_SPECIAL 17413 ){ 17414 *pz = 0; 17415 *pn = 0; 17416 }else{ 17417 rc = fts5SeekCursor(pCsr, 0); 17418 if( rc==SQLITE_OK ){ 17419 *pz = (const char*)sqlite3_column_text(pCsr->pStmt, iCol+1); 17420 *pn = sqlite3_column_bytes(pCsr->pStmt, iCol+1); 17421 } 17422 } 17423 return rc; 17424 } 17425 17426 static int fts5CsrPoslist( 17427 Fts5Cursor *pCsr, 17428 int iPhrase, 17429 const u8 **pa, 17430 int *pn 17431 ){ 17432 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; 17433 int rc = SQLITE_OK; 17434 int bLive = (pCsr->pSorter==0); 17435 17436 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_POSLIST) ){ 17437 17438 if( pConfig->eDetail!=FTS5_DETAIL_FULL ){ 17439 Fts5PoslistPopulator *aPopulator; 17440 int i; 17441 aPopulator = sqlite3Fts5ExprClearPoslists(pCsr->pExpr, bLive); 17442 if( aPopulator==0 ) rc = SQLITE_NOMEM; 17443 for(i=0; i<pConfig->nCol && rc==SQLITE_OK; i++){ 17444 int n; const char *z; 17445 rc = fts5ApiColumnText((Fts5Context*)pCsr, i, &z, &n); 17446 if( rc==SQLITE_OK ){ 17447 rc = sqlite3Fts5ExprPopulatePoslists( 17448 pConfig, pCsr->pExpr, aPopulator, i, z, n 17449 ); 17450 } 17451 } 17452 sqlite3_free(aPopulator); 17453 17454 if( pCsr->pSorter ){ 17455 sqlite3Fts5ExprCheckPoslists(pCsr->pExpr, pCsr->pSorter->iRowid); 17456 } 17457 } 17458 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_POSLIST); 17459 } 17460 17461 if( pCsr->pSorter && pConfig->eDetail==FTS5_DETAIL_FULL ){ 17462 Fts5Sorter *pSorter = pCsr->pSorter; 17463 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); 17464 *pn = pSorter->aIdx[iPhrase] - i1; 17465 *pa = &pSorter->aPoslist[i1]; 17466 }else{ 17467 *pn = sqlite3Fts5ExprPoslist(pCsr->pExpr, iPhrase, pa); 17468 } 17469 17470 return rc; 17471 } 17472 17473 /* 17474 ** Ensure that the Fts5Cursor.nInstCount and aInst[] variables are populated 17475 ** correctly for the current view. Return SQLITE_OK if successful, or an 17476 ** SQLite error code otherwise. 17477 */ 17478 static int fts5CacheInstArray(Fts5Cursor *pCsr){ 17479 int rc = SQLITE_OK; 17480 Fts5PoslistReader *aIter; /* One iterator for each phrase */ 17481 int nIter; /* Number of iterators/phrases */ 17482 int nCol = ((Fts5Table*)pCsr->base.pVtab)->pConfig->nCol; 17483 17484 nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); 17485 if( pCsr->aInstIter==0 ){ 17486 sqlite3_int64 nByte = sizeof(Fts5PoslistReader) * nIter; 17487 pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte); 17488 } 17489 aIter = pCsr->aInstIter; 17490 17491 if( aIter ){ 17492 int nInst = 0; /* Number instances seen so far */ 17493 int i; 17494 17495 /* Initialize all iterators */ 17496 for(i=0; i<nIter && rc==SQLITE_OK; i++){ 17497 const u8 *a; 17498 int n; 17499 rc = fts5CsrPoslist(pCsr, i, &a, &n); 17500 if( rc==SQLITE_OK ){ 17501 sqlite3Fts5PoslistReaderInit(a, n, &aIter[i]); 17502 } 17503 } 17504 17505 if( rc==SQLITE_OK ){ 17506 while( 1 ){ 17507 int *aInst; 17508 int iBest = -1; 17509 for(i=0; i<nIter; i++){ 17510 if( (aIter[i].bEof==0) 17511 && (iBest<0 || aIter[i].iPos<aIter[iBest].iPos) 17512 ){ 17513 iBest = i; 17514 } 17515 } 17516 if( iBest<0 ) break; 17517 17518 nInst++; 17519 if( nInst>=pCsr->nInstAlloc ){ 17520 int nNewSize = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32; 17521 aInst = (int*)sqlite3_realloc64( 17522 pCsr->aInst, nNewSize*sizeof(int)*3 17523 ); 17524 if( aInst ){ 17525 pCsr->aInst = aInst; 17526 pCsr->nInstAlloc = nNewSize; 17527 }else{ 17528 nInst--; 17529 rc = SQLITE_NOMEM; 17530 break; 17531 } 17532 } 17533 17534 aInst = &pCsr->aInst[3 * (nInst-1)]; 17535 aInst[0] = iBest; 17536 aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos); 17537 aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos); 17538 if( aInst[1]<0 || aInst[1]>=nCol ){ 17539 rc = FTS5_CORRUPT; 17540 break; 17541 } 17542 sqlite3Fts5PoslistReaderNext(&aIter[iBest]); 17543 } 17544 } 17545 17546 pCsr->nInstCount = nInst; 17547 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST); 17548 } 17549 return rc; 17550 } 17551 17552 static int fts5ApiInstCount(Fts5Context *pCtx, int *pnInst){ 17553 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17554 int rc = SQLITE_OK; 17555 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 17556 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) ){ 17557 *pnInst = pCsr->nInstCount; 17558 } 17559 return rc; 17560 } 17561 17562 static int fts5ApiInst( 17563 Fts5Context *pCtx, 17564 int iIdx, 17565 int *piPhrase, 17566 int *piCol, 17567 int *piOff 17568 ){ 17569 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17570 int rc = SQLITE_OK; 17571 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST)==0 17572 || SQLITE_OK==(rc = fts5CacheInstArray(pCsr)) 17573 ){ 17574 if( iIdx<0 || iIdx>=pCsr->nInstCount ){ 17575 rc = SQLITE_RANGE; 17576 #if 0 17577 }else if( fts5IsOffsetless((Fts5Table*)pCsr->base.pVtab) ){ 17578 *piPhrase = pCsr->aInst[iIdx*3]; 17579 *piCol = pCsr->aInst[iIdx*3 + 2]; 17580 *piOff = -1; 17581 #endif 17582 }else{ 17583 *piPhrase = pCsr->aInst[iIdx*3]; 17584 *piCol = pCsr->aInst[iIdx*3 + 1]; 17585 *piOff = pCsr->aInst[iIdx*3 + 2]; 17586 } 17587 } 17588 return rc; 17589 } 17590 17591 static sqlite3_int64 fts5ApiRowid(Fts5Context *pCtx){ 17592 return fts5CursorRowid((Fts5Cursor*)pCtx); 17593 } 17594 17595 static int fts5ColumnSizeCb( 17596 void *pContext, /* Pointer to int */ 17597 int tflags, 17598 const char *pUnused, /* Buffer containing token */ 17599 int nUnused, /* Size of token in bytes */ 17600 int iUnused1, /* Start offset of token */ 17601 int iUnused2 /* End offset of token */ 17602 ){ 17603 int *pCnt = (int*)pContext; 17604 UNUSED_PARAM2(pUnused, nUnused); 17605 UNUSED_PARAM2(iUnused1, iUnused2); 17606 if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){ 17607 (*pCnt)++; 17608 } 17609 return SQLITE_OK; 17610 } 17611 17612 static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ 17613 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17614 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 17615 Fts5Config *pConfig = pTab->p.pConfig; 17616 int rc = SQLITE_OK; 17617 17618 if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_DOCSIZE) ){ 17619 if( pConfig->bColumnsize ){ 17620 i64 iRowid = fts5CursorRowid(pCsr); 17621 rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); 17622 }else if( pConfig->zContent==0 ){ 17623 int i; 17624 for(i=0; i<pConfig->nCol; i++){ 17625 if( pConfig->abUnindexed[i]==0 ){ 17626 pCsr->aColumnSize[i] = -1; 17627 } 17628 } 17629 }else{ 17630 int i; 17631 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ 17632 if( pConfig->abUnindexed[i]==0 ){ 17633 const char *z; int n; 17634 void *p = (void*)(&pCsr->aColumnSize[i]); 17635 pCsr->aColumnSize[i] = 0; 17636 rc = fts5ApiColumnText(pCtx, i, &z, &n); 17637 if( rc==SQLITE_OK ){ 17638 rc = sqlite3Fts5Tokenize( 17639 pConfig, FTS5_TOKENIZE_AUX, z, n, p, fts5ColumnSizeCb 17640 ); 17641 } 17642 } 17643 } 17644 } 17645 CsrFlagClear(pCsr, FTS5CSR_REQUIRE_DOCSIZE); 17646 } 17647 if( iCol<0 ){ 17648 int i; 17649 *pnToken = 0; 17650 for(i=0; i<pConfig->nCol; i++){ 17651 *pnToken += pCsr->aColumnSize[i]; 17652 } 17653 }else if( iCol<pConfig->nCol ){ 17654 *pnToken = pCsr->aColumnSize[iCol]; 17655 }else{ 17656 *pnToken = 0; 17657 rc = SQLITE_RANGE; 17658 } 17659 return rc; 17660 } 17661 17662 /* 17663 ** Implementation of the xSetAuxdata() method. 17664 */ 17665 static int fts5ApiSetAuxdata( 17666 Fts5Context *pCtx, /* Fts5 context */ 17667 void *pPtr, /* Pointer to save as auxdata */ 17668 void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ 17669 ){ 17670 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17671 Fts5Auxdata *pData; 17672 17673 /* Search through the cursors list of Fts5Auxdata objects for one that 17674 ** corresponds to the currently executing auxiliary function. */ 17675 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ 17676 if( pData->pAux==pCsr->pAux ) break; 17677 } 17678 17679 if( pData ){ 17680 if( pData->xDelete ){ 17681 pData->xDelete(pData->pPtr); 17682 } 17683 }else{ 17684 int rc = SQLITE_OK; 17685 pData = (Fts5Auxdata*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Auxdata)); 17686 if( pData==0 ){ 17687 if( xDelete ) xDelete(pPtr); 17688 return rc; 17689 } 17690 pData->pAux = pCsr->pAux; 17691 pData->pNext = pCsr->pAuxdata; 17692 pCsr->pAuxdata = pData; 17693 } 17694 17695 pData->xDelete = xDelete; 17696 pData->pPtr = pPtr; 17697 return SQLITE_OK; 17698 } 17699 17700 static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ 17701 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17702 Fts5Auxdata *pData; 17703 void *pRet = 0; 17704 17705 for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ 17706 if( pData->pAux==pCsr->pAux ) break; 17707 } 17708 17709 if( pData ){ 17710 pRet = pData->pPtr; 17711 if( bClear ){ 17712 pData->pPtr = 0; 17713 pData->xDelete = 0; 17714 } 17715 } 17716 17717 return pRet; 17718 } 17719 17720 static void fts5ApiPhraseNext( 17721 Fts5Context *pUnused, 17722 Fts5PhraseIter *pIter, 17723 int *piCol, int *piOff 17724 ){ 17725 UNUSED_PARAM(pUnused); 17726 if( pIter->a>=pIter->b ){ 17727 *piCol = -1; 17728 *piOff = -1; 17729 }else{ 17730 int iVal; 17731 pIter->a += fts5GetVarint32(pIter->a, iVal); 17732 if( iVal==1 ){ 17733 pIter->a += fts5GetVarint32(pIter->a, iVal); 17734 *piCol = iVal; 17735 *piOff = 0; 17736 pIter->a += fts5GetVarint32(pIter->a, iVal); 17737 } 17738 *piOff += (iVal-2); 17739 } 17740 } 17741 17742 static int fts5ApiPhraseFirst( 17743 Fts5Context *pCtx, 17744 int iPhrase, 17745 Fts5PhraseIter *pIter, 17746 int *piCol, int *piOff 17747 ){ 17748 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17749 int n; 17750 int rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); 17751 if( rc==SQLITE_OK ){ 17752 assert( pIter->a || n==0 ); 17753 pIter->b = (pIter->a ? &pIter->a[n] : 0); 17754 *piCol = 0; 17755 *piOff = 0; 17756 fts5ApiPhraseNext(pCtx, pIter, piCol, piOff); 17757 } 17758 return rc; 17759 } 17760 17761 static void fts5ApiPhraseNextColumn( 17762 Fts5Context *pCtx, 17763 Fts5PhraseIter *pIter, 17764 int *piCol 17765 ){ 17766 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17767 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; 17768 17769 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ 17770 if( pIter->a>=pIter->b ){ 17771 *piCol = -1; 17772 }else{ 17773 int iIncr; 17774 pIter->a += fts5GetVarint32(&pIter->a[0], iIncr); 17775 *piCol += (iIncr-2); 17776 } 17777 }else{ 17778 while( 1 ){ 17779 int dummy; 17780 if( pIter->a>=pIter->b ){ 17781 *piCol = -1; 17782 return; 17783 } 17784 if( pIter->a[0]==0x01 ) break; 17785 pIter->a += fts5GetVarint32(pIter->a, dummy); 17786 } 17787 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); 17788 } 17789 } 17790 17791 static int fts5ApiPhraseFirstColumn( 17792 Fts5Context *pCtx, 17793 int iPhrase, 17794 Fts5PhraseIter *pIter, 17795 int *piCol 17796 ){ 17797 int rc = SQLITE_OK; 17798 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17799 Fts5Config *pConfig = ((Fts5Table*)(pCsr->base.pVtab))->pConfig; 17800 17801 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ 17802 Fts5Sorter *pSorter = pCsr->pSorter; 17803 int n; 17804 if( pSorter ){ 17805 int i1 = (iPhrase==0 ? 0 : pSorter->aIdx[iPhrase-1]); 17806 n = pSorter->aIdx[iPhrase] - i1; 17807 pIter->a = &pSorter->aPoslist[i1]; 17808 }else{ 17809 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, iPhrase, &pIter->a, &n); 17810 } 17811 if( rc==SQLITE_OK ){ 17812 assert( pIter->a || n==0 ); 17813 pIter->b = (pIter->a ? &pIter->a[n] : 0); 17814 *piCol = 0; 17815 fts5ApiPhraseNextColumn(pCtx, pIter, piCol); 17816 } 17817 }else{ 17818 int n; 17819 rc = fts5CsrPoslist(pCsr, iPhrase, &pIter->a, &n); 17820 if( rc==SQLITE_OK ){ 17821 assert( pIter->a || n==0 ); 17822 pIter->b = (pIter->a ? &pIter->a[n] : 0); 17823 if( n<=0 ){ 17824 *piCol = -1; 17825 }else if( pIter->a[0]==0x01 ){ 17826 pIter->a += 1 + fts5GetVarint32(&pIter->a[1], *piCol); 17827 }else{ 17828 *piCol = 0; 17829 } 17830 } 17831 } 17832 17833 return rc; 17834 } 17835 17836 17837 static int fts5ApiQueryPhrase(Fts5Context*, int, void*, 17838 int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) 17839 ); 17840 17841 static const Fts5ExtensionApi sFts5Api = { 17842 2, /* iVersion */ 17843 fts5ApiUserData, 17844 fts5ApiColumnCount, 17845 fts5ApiRowCount, 17846 fts5ApiColumnTotalSize, 17847 fts5ApiTokenize, 17848 fts5ApiPhraseCount, 17849 fts5ApiPhraseSize, 17850 fts5ApiInstCount, 17851 fts5ApiInst, 17852 fts5ApiRowid, 17853 fts5ApiColumnText, 17854 fts5ApiColumnSize, 17855 fts5ApiQueryPhrase, 17856 fts5ApiSetAuxdata, 17857 fts5ApiGetAuxdata, 17858 fts5ApiPhraseFirst, 17859 fts5ApiPhraseNext, 17860 fts5ApiPhraseFirstColumn, 17861 fts5ApiPhraseNextColumn, 17862 }; 17863 17864 /* 17865 ** Implementation of API function xQueryPhrase(). 17866 */ 17867 static int fts5ApiQueryPhrase( 17868 Fts5Context *pCtx, 17869 int iPhrase, 17870 void *pUserData, 17871 int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) 17872 ){ 17873 Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; 17874 Fts5FullTable *pTab = (Fts5FullTable*)(pCsr->base.pVtab); 17875 int rc; 17876 Fts5Cursor *pNew = 0; 17877 17878 rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); 17879 if( rc==SQLITE_OK ){ 17880 pNew->ePlan = FTS5_PLAN_MATCH; 17881 pNew->iFirstRowid = SMALLEST_INT64; 17882 pNew->iLastRowid = LARGEST_INT64; 17883 pNew->base.pVtab = (sqlite3_vtab*)pTab; 17884 rc = sqlite3Fts5ExprClonePhrase(pCsr->pExpr, iPhrase, &pNew->pExpr); 17885 } 17886 17887 if( rc==SQLITE_OK ){ 17888 for(rc = fts5CursorFirst(pTab, pNew, 0); 17889 rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; 17890 rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) 17891 ){ 17892 rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); 17893 if( rc!=SQLITE_OK ){ 17894 if( rc==SQLITE_DONE ) rc = SQLITE_OK; 17895 break; 17896 } 17897 } 17898 } 17899 17900 fts5CloseMethod((sqlite3_vtab_cursor*)pNew); 17901 return rc; 17902 } 17903 17904 static void fts5ApiInvoke( 17905 Fts5Auxiliary *pAux, 17906 Fts5Cursor *pCsr, 17907 sqlite3_context *context, 17908 int argc, 17909 sqlite3_value **argv 17910 ){ 17911 assert( pCsr->pAux==0 ); 17912 pCsr->pAux = pAux; 17913 pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc, argv); 17914 pCsr->pAux = 0; 17915 } 17916 17917 static Fts5Cursor *fts5CursorFromCsrid(Fts5Global *pGlobal, i64 iCsrId){ 17918 Fts5Cursor *pCsr; 17919 for(pCsr=pGlobal->pCsr; pCsr; pCsr=pCsr->pNext){ 17920 if( pCsr->iCsrId==iCsrId ) break; 17921 } 17922 return pCsr; 17923 } 17924 17925 static void fts5ApiCallback( 17926 sqlite3_context *context, 17927 int argc, 17928 sqlite3_value **argv 17929 ){ 17930 17931 Fts5Auxiliary *pAux; 17932 Fts5Cursor *pCsr; 17933 i64 iCsrId; 17934 17935 assert( argc>=1 ); 17936 pAux = (Fts5Auxiliary*)sqlite3_user_data(context); 17937 iCsrId = sqlite3_value_int64(argv[0]); 17938 17939 pCsr = fts5CursorFromCsrid(pAux->pGlobal, iCsrId); 17940 if( pCsr==0 || pCsr->ePlan==0 ){ 17941 char *zErr = sqlite3_mprintf("no such cursor: %lld", iCsrId); 17942 sqlite3_result_error(context, zErr, -1); 17943 sqlite3_free(zErr); 17944 }else{ 17945 fts5ApiInvoke(pAux, pCsr, context, argc-1, &argv[1]); 17946 } 17947 } 17948 17949 17950 /* 17951 ** Given cursor id iId, return a pointer to the corresponding Fts5Table 17952 ** object. Or NULL If the cursor id does not exist. 17953 */ 17954 static Fts5Table *sqlite3Fts5TableFromCsrid( 17955 Fts5Global *pGlobal, /* FTS5 global context for db handle */ 17956 i64 iCsrId /* Id of cursor to find */ 17957 ){ 17958 Fts5Cursor *pCsr; 17959 pCsr = fts5CursorFromCsrid(pGlobal, iCsrId); 17960 if( pCsr ){ 17961 return (Fts5Table*)pCsr->base.pVtab; 17962 } 17963 return 0; 17964 } 17965 17966 /* 17967 ** Return a "position-list blob" corresponding to the current position of 17968 ** cursor pCsr via sqlite3_result_blob(). A position-list blob contains 17969 ** the current position-list for each phrase in the query associated with 17970 ** cursor pCsr. 17971 ** 17972 ** A position-list blob begins with (nPhrase-1) varints, where nPhrase is 17973 ** the number of phrases in the query. Following the varints are the 17974 ** concatenated position lists for each phrase, in order. 17975 ** 17976 ** The first varint (if it exists) contains the size of the position list 17977 ** for phrase 0. The second (same disclaimer) contains the size of position 17978 ** list 1. And so on. There is no size field for the final position list, 17979 ** as it can be derived from the total size of the blob. 17980 */ 17981 static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ 17982 int i; 17983 int rc = SQLITE_OK; 17984 int nPhrase = sqlite3Fts5ExprPhraseCount(pCsr->pExpr); 17985 Fts5Buffer val; 17986 17987 memset(&val, 0, sizeof(Fts5Buffer)); 17988 switch( ((Fts5Table*)(pCsr->base.pVtab))->pConfig->eDetail ){ 17989 case FTS5_DETAIL_FULL: 17990 17991 /* Append the varints */ 17992 for(i=0; i<(nPhrase-1); i++){ 17993 const u8 *dummy; 17994 int nByte = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &dummy); 17995 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); 17996 } 17997 17998 /* Append the position lists */ 17999 for(i=0; i<nPhrase; i++){ 18000 const u8 *pPoslist; 18001 int nPoslist; 18002 nPoslist = sqlite3Fts5ExprPoslist(pCsr->pExpr, i, &pPoslist); 18003 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); 18004 } 18005 break; 18006 18007 case FTS5_DETAIL_COLUMNS: 18008 18009 /* Append the varints */ 18010 for(i=0; rc==SQLITE_OK && i<(nPhrase-1); i++){ 18011 const u8 *dummy; 18012 int nByte; 18013 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &dummy, &nByte); 18014 sqlite3Fts5BufferAppendVarint(&rc, &val, nByte); 18015 } 18016 18017 /* Append the position lists */ 18018 for(i=0; rc==SQLITE_OK && i<nPhrase; i++){ 18019 const u8 *pPoslist; 18020 int nPoslist; 18021 rc = sqlite3Fts5ExprPhraseCollist(pCsr->pExpr, i, &pPoslist, &nPoslist); 18022 sqlite3Fts5BufferAppendBlob(&rc, &val, nPoslist, pPoslist); 18023 } 18024 break; 18025 18026 default: 18027 break; 18028 } 18029 18030 sqlite3_result_blob(pCtx, val.p, val.n, sqlite3_free); 18031 return rc; 18032 } 18033 18034 /* 18035 ** This is the xColumn method, called by SQLite to request a value from 18036 ** the row that the supplied cursor currently points to. 18037 */ 18038 static int fts5ColumnMethod( 18039 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ 18040 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ 18041 int iCol /* Index of column to read value from */ 18042 ){ 18043 Fts5FullTable *pTab = (Fts5FullTable*)(pCursor->pVtab); 18044 Fts5Config *pConfig = pTab->p.pConfig; 18045 Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; 18046 int rc = SQLITE_OK; 18047 18048 assert( CsrFlagTest(pCsr, FTS5CSR_EOF)==0 ); 18049 18050 if( pCsr->ePlan==FTS5_PLAN_SPECIAL ){ 18051 if( iCol==pConfig->nCol ){ 18052 sqlite3_result_int64(pCtx, pCsr->iSpecial); 18053 } 18054 }else 18055 18056 if( iCol==pConfig->nCol ){ 18057 /* User is requesting the value of the special column with the same name 18058 ** as the table. Return the cursor integer id number. This value is only 18059 ** useful in that it may be passed as the first argument to an FTS5 18060 ** auxiliary function. */ 18061 sqlite3_result_int64(pCtx, pCsr->iCsrId); 18062 }else if( iCol==pConfig->nCol+1 ){ 18063 18064 /* The value of the "rank" column. */ 18065 if( pCsr->ePlan==FTS5_PLAN_SOURCE ){ 18066 fts5PoslistBlob(pCtx, pCsr); 18067 }else if( 18068 pCsr->ePlan==FTS5_PLAN_MATCH 18069 || pCsr->ePlan==FTS5_PLAN_SORTED_MATCH 18070 ){ 18071 if( pCsr->pRank || SQLITE_OK==(rc = fts5FindRankFunction(pCsr)) ){ 18072 fts5ApiInvoke(pCsr->pRank, pCsr, pCtx, pCsr->nRankArg, pCsr->apRankArg); 18073 } 18074 } 18075 }else if( !fts5IsContentless(pTab) ){ 18076 pConfig->pzErrmsg = &pTab->p.base.zErrMsg; 18077 rc = fts5SeekCursor(pCsr, 1); 18078 if( rc==SQLITE_OK ){ 18079 sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); 18080 } 18081 pConfig->pzErrmsg = 0; 18082 } 18083 return rc; 18084 } 18085 18086 18087 /* 18088 ** This routine implements the xFindFunction method for the FTS3 18089 ** virtual table. 18090 */ 18091 static int fts5FindFunctionMethod( 18092 sqlite3_vtab *pVtab, /* Virtual table handle */ 18093 int nUnused, /* Number of SQL function arguments */ 18094 const char *zName, /* Name of SQL function */ 18095 void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), /* OUT: Result */ 18096 void **ppArg /* OUT: User data for *pxFunc */ 18097 ){ 18098 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 18099 Fts5Auxiliary *pAux; 18100 18101 UNUSED_PARAM(nUnused); 18102 pAux = fts5FindAuxiliary(pTab, zName); 18103 if( pAux ){ 18104 *pxFunc = fts5ApiCallback; 18105 *ppArg = (void*)pAux; 18106 return 1; 18107 } 18108 18109 /* No function of the specified name was found. Return 0. */ 18110 return 0; 18111 } 18112 18113 /* 18114 ** Implementation of FTS5 xRename method. Rename an fts5 table. 18115 */ 18116 static int fts5RenameMethod( 18117 sqlite3_vtab *pVtab, /* Virtual table handle */ 18118 const char *zName /* New name of table */ 18119 ){ 18120 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 18121 return sqlite3Fts5StorageRename(pTab->pStorage, zName); 18122 } 18123 18124 static int sqlite3Fts5FlushToDisk(Fts5Table *pTab){ 18125 fts5TripCursors((Fts5FullTable*)pTab); 18126 return sqlite3Fts5StorageSync(((Fts5FullTable*)pTab)->pStorage); 18127 } 18128 18129 /* 18130 ** The xSavepoint() method. 18131 ** 18132 ** Flush the contents of the pending-terms table to disk. 18133 */ 18134 static int fts5SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ 18135 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ 18136 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_SAVEPOINT, iSavepoint); 18137 return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); 18138 } 18139 18140 /* 18141 ** The xRelease() method. 18142 ** 18143 ** This is a no-op. 18144 */ 18145 static int fts5ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ 18146 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ 18147 fts5CheckTransactionState((Fts5FullTable*)pVtab, FTS5_RELEASE, iSavepoint); 18148 return sqlite3Fts5FlushToDisk((Fts5Table*)pVtab); 18149 } 18150 18151 /* 18152 ** The xRollbackTo() method. 18153 ** 18154 ** Discard the contents of the pending terms table. 18155 */ 18156 static int fts5RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ 18157 Fts5FullTable *pTab = (Fts5FullTable*)pVtab; 18158 UNUSED_PARAM(iSavepoint); /* Call below is a no-op for NDEBUG builds */ 18159 fts5CheckTransactionState(pTab, FTS5_ROLLBACKTO, iSavepoint); 18160 fts5TripCursors(pTab); 18161 return sqlite3Fts5StorageRollback(pTab->pStorage); 18162 } 18163 18164 /* 18165 ** Register a new auxiliary function with global context pGlobal. 18166 */ 18167 static int fts5CreateAux( 18168 fts5_api *pApi, /* Global context (one per db handle) */ 18169 const char *zName, /* Name of new function */ 18170 void *pUserData, /* User data for aux. function */ 18171 fts5_extension_function xFunc, /* Aux. function implementation */ 18172 void(*xDestroy)(void*) /* Destructor for pUserData */ 18173 ){ 18174 Fts5Global *pGlobal = (Fts5Global*)pApi; 18175 int rc = sqlite3_overload_function(pGlobal->db, zName, -1); 18176 if( rc==SQLITE_OK ){ 18177 Fts5Auxiliary *pAux; 18178 sqlite3_int64 nName; /* Size of zName in bytes, including \0 */ 18179 sqlite3_int64 nByte; /* Bytes of space to allocate */ 18180 18181 nName = strlen(zName) + 1; 18182 nByte = sizeof(Fts5Auxiliary) + nName; 18183 pAux = (Fts5Auxiliary*)sqlite3_malloc64(nByte); 18184 if( pAux ){ 18185 memset(pAux, 0, (size_t)nByte); 18186 pAux->zFunc = (char*)&pAux[1]; 18187 memcpy(pAux->zFunc, zName, nName); 18188 pAux->pGlobal = pGlobal; 18189 pAux->pUserData = pUserData; 18190 pAux->xFunc = xFunc; 18191 pAux->xDestroy = xDestroy; 18192 pAux->pNext = pGlobal->pAux; 18193 pGlobal->pAux = pAux; 18194 }else{ 18195 rc = SQLITE_NOMEM; 18196 } 18197 } 18198 18199 return rc; 18200 } 18201 18202 /* 18203 ** Register a new tokenizer. This is the implementation of the 18204 ** fts5_api.xCreateTokenizer() method. 18205 */ 18206 static int fts5CreateTokenizer( 18207 fts5_api *pApi, /* Global context (one per db handle) */ 18208 const char *zName, /* Name of new function */ 18209 void *pUserData, /* User data for aux. function */ 18210 fts5_tokenizer *pTokenizer, /* Tokenizer implementation */ 18211 void(*xDestroy)(void*) /* Destructor for pUserData */ 18212 ){ 18213 Fts5Global *pGlobal = (Fts5Global*)pApi; 18214 Fts5TokenizerModule *pNew; 18215 sqlite3_int64 nName; /* Size of zName and its \0 terminator */ 18216 sqlite3_int64 nByte; /* Bytes of space to allocate */ 18217 int rc = SQLITE_OK; 18218 18219 nName = strlen(zName) + 1; 18220 nByte = sizeof(Fts5TokenizerModule) + nName; 18221 pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte); 18222 if( pNew ){ 18223 memset(pNew, 0, (size_t)nByte); 18224 pNew->zName = (char*)&pNew[1]; 18225 memcpy(pNew->zName, zName, nName); 18226 pNew->pUserData = pUserData; 18227 pNew->x = *pTokenizer; 18228 pNew->xDestroy = xDestroy; 18229 pNew->pNext = pGlobal->pTok; 18230 pGlobal->pTok = pNew; 18231 if( pNew->pNext==0 ){ 18232 pGlobal->pDfltTok = pNew; 18233 } 18234 }else{ 18235 rc = SQLITE_NOMEM; 18236 } 18237 18238 return rc; 18239 } 18240 18241 static Fts5TokenizerModule *fts5LocateTokenizer( 18242 Fts5Global *pGlobal, 18243 const char *zName 18244 ){ 18245 Fts5TokenizerModule *pMod = 0; 18246 18247 if( zName==0 ){ 18248 pMod = pGlobal->pDfltTok; 18249 }else{ 18250 for(pMod=pGlobal->pTok; pMod; pMod=pMod->pNext){ 18251 if( sqlite3_stricmp(zName, pMod->zName)==0 ) break; 18252 } 18253 } 18254 18255 return pMod; 18256 } 18257 18258 /* 18259 ** Find a tokenizer. This is the implementation of the 18260 ** fts5_api.xFindTokenizer() method. 18261 */ 18262 static int fts5FindTokenizer( 18263 fts5_api *pApi, /* Global context (one per db handle) */ 18264 const char *zName, /* Name of new function */ 18265 void **ppUserData, 18266 fts5_tokenizer *pTokenizer /* Populate this object */ 18267 ){ 18268 int rc = SQLITE_OK; 18269 Fts5TokenizerModule *pMod; 18270 18271 pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName); 18272 if( pMod ){ 18273 *pTokenizer = pMod->x; 18274 *ppUserData = pMod->pUserData; 18275 }else{ 18276 memset(pTokenizer, 0, sizeof(fts5_tokenizer)); 18277 rc = SQLITE_ERROR; 18278 } 18279 18280 return rc; 18281 } 18282 18283 static int sqlite3Fts5GetTokenizer( 18284 Fts5Global *pGlobal, 18285 const char **azArg, 18286 int nArg, 18287 Fts5Config *pConfig, 18288 char **pzErr 18289 ){ 18290 Fts5TokenizerModule *pMod; 18291 int rc = SQLITE_OK; 18292 18293 pMod = fts5LocateTokenizer(pGlobal, nArg==0 ? 0 : azArg[0]); 18294 if( pMod==0 ){ 18295 assert( nArg>0 ); 18296 rc = SQLITE_ERROR; 18297 *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); 18298 }else{ 18299 rc = pMod->x.xCreate( 18300 pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok 18301 ); 18302 pConfig->pTokApi = &pMod->x; 18303 if( rc!=SQLITE_OK ){ 18304 if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor"); 18305 }else{ 18306 pConfig->ePattern = sqlite3Fts5TokenizerPattern( 18307 pMod->x.xCreate, pConfig->pTok 18308 ); 18309 } 18310 } 18311 18312 if( rc!=SQLITE_OK ){ 18313 pConfig->pTokApi = 0; 18314 pConfig->pTok = 0; 18315 } 18316 18317 return rc; 18318 } 18319 18320 static void fts5ModuleDestroy(void *pCtx){ 18321 Fts5TokenizerModule *pTok, *pNextTok; 18322 Fts5Auxiliary *pAux, *pNextAux; 18323 Fts5Global *pGlobal = (Fts5Global*)pCtx; 18324 18325 for(pAux=pGlobal->pAux; pAux; pAux=pNextAux){ 18326 pNextAux = pAux->pNext; 18327 if( pAux->xDestroy ) pAux->xDestroy(pAux->pUserData); 18328 sqlite3_free(pAux); 18329 } 18330 18331 for(pTok=pGlobal->pTok; pTok; pTok=pNextTok){ 18332 pNextTok = pTok->pNext; 18333 if( pTok->xDestroy ) pTok->xDestroy(pTok->pUserData); 18334 sqlite3_free(pTok); 18335 } 18336 18337 sqlite3_free(pGlobal); 18338 } 18339 18340 static void fts5Fts5Func( 18341 sqlite3_context *pCtx, /* Function call context */ 18342 int nArg, /* Number of args */ 18343 sqlite3_value **apArg /* Function arguments */ 18344 ){ 18345 Fts5Global *pGlobal = (Fts5Global*)sqlite3_user_data(pCtx); 18346 fts5_api **ppApi; 18347 UNUSED_PARAM(nArg); 18348 assert( nArg==1 ); 18349 ppApi = (fts5_api**)sqlite3_value_pointer(apArg[0], "fts5_api_ptr"); 18350 if( ppApi ) *ppApi = &pGlobal->api; 18351 } 18352 18353 /* 18354 ** Implementation of fts5_source_id() function. 18355 */ 18356 static void fts5SourceIdFunc( 18357 sqlite3_context *pCtx, /* Function call context */ 18358 int nArg, /* Number of args */ 18359 sqlite3_value **apUnused /* Function arguments */ 18360 ){ 18361 assert( nArg==0 ); 18362 UNUSED_PARAM2(nArg, apUnused); 18363 sqlite3_result_text(pCtx, "fts5: 2022-11-16 12:10:08 89c459e766ea7e9165d0beeb124708b955a4950d0f4792f457465d71b158d318", -1, SQLITE_TRANSIENT); 18364 } 18365 18366 /* 18367 ** Return true if zName is the extension on one of the shadow tables used 18368 ** by this module. 18369 */ 18370 static int fts5ShadowName(const char *zName){ 18371 static const char *azName[] = { 18372 "config", "content", "data", "docsize", "idx" 18373 }; 18374 unsigned int i; 18375 for(i=0; i<sizeof(azName)/sizeof(azName[0]); i++){ 18376 if( sqlite3_stricmp(zName, azName[i])==0 ) return 1; 18377 } 18378 return 0; 18379 } 18380 18381 static int fts5Init(sqlite3 *db){ 18382 static const sqlite3_module fts5Mod = { 18383 /* iVersion */ 3, 18384 /* xCreate */ fts5CreateMethod, 18385 /* xConnect */ fts5ConnectMethod, 18386 /* xBestIndex */ fts5BestIndexMethod, 18387 /* xDisconnect */ fts5DisconnectMethod, 18388 /* xDestroy */ fts5DestroyMethod, 18389 /* xOpen */ fts5OpenMethod, 18390 /* xClose */ fts5CloseMethod, 18391 /* xFilter */ fts5FilterMethod, 18392 /* xNext */ fts5NextMethod, 18393 /* xEof */ fts5EofMethod, 18394 /* xColumn */ fts5ColumnMethod, 18395 /* xRowid */ fts5RowidMethod, 18396 /* xUpdate */ fts5UpdateMethod, 18397 /* xBegin */ fts5BeginMethod, 18398 /* xSync */ fts5SyncMethod, 18399 /* xCommit */ fts5CommitMethod, 18400 /* xRollback */ fts5RollbackMethod, 18401 /* xFindFunction */ fts5FindFunctionMethod, 18402 /* xRename */ fts5RenameMethod, 18403 /* xSavepoint */ fts5SavepointMethod, 18404 /* xRelease */ fts5ReleaseMethod, 18405 /* xRollbackTo */ fts5RollbackToMethod, 18406 /* xShadowName */ fts5ShadowName 18407 }; 18408 18409 int rc; 18410 Fts5Global *pGlobal = 0; 18411 18412 pGlobal = (Fts5Global*)sqlite3_malloc(sizeof(Fts5Global)); 18413 if( pGlobal==0 ){ 18414 rc = SQLITE_NOMEM; 18415 }else{ 18416 void *p = (void*)pGlobal; 18417 memset(pGlobal, 0, sizeof(Fts5Global)); 18418 pGlobal->db = db; 18419 pGlobal->api.iVersion = 2; 18420 pGlobal->api.xCreateFunction = fts5CreateAux; 18421 pGlobal->api.xCreateTokenizer = fts5CreateTokenizer; 18422 pGlobal->api.xFindTokenizer = fts5FindTokenizer; 18423 rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); 18424 if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); 18425 if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); 18426 if( rc==SQLITE_OK ) rc = sqlite3Fts5AuxInit(&pGlobal->api); 18427 if( rc==SQLITE_OK ) rc = sqlite3Fts5TokenizerInit(&pGlobal->api); 18428 if( rc==SQLITE_OK ) rc = sqlite3Fts5VocabInit(pGlobal, db); 18429 if( rc==SQLITE_OK ){ 18430 rc = sqlite3_create_function( 18431 db, "fts5", 1, SQLITE_UTF8, p, fts5Fts5Func, 0, 0 18432 ); 18433 } 18434 if( rc==SQLITE_OK ){ 18435 rc = sqlite3_create_function( 18436 db, "fts5_source_id", 0, SQLITE_UTF8, p, fts5SourceIdFunc, 0, 0 18437 ); 18438 } 18439 } 18440 18441 /* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file 18442 ** fts5_test_mi.c is compiled and linked into the executable. And call 18443 ** its entry point to enable the matchinfo() demo. */ 18444 #ifdef SQLITE_FTS5_ENABLE_TEST_MI 18445 if( rc==SQLITE_OK ){ 18446 extern int sqlite3Fts5TestRegisterMatchinfo(sqlite3*); 18447 rc = sqlite3Fts5TestRegisterMatchinfo(db); 18448 } 18449 #endif 18450 18451 return rc; 18452 } 18453 18454 /* 18455 ** The following functions are used to register the module with SQLite. If 18456 ** this module is being built as part of the SQLite core (SQLITE_CORE is 18457 ** defined), then sqlite3_open() will call sqlite3Fts5Init() directly. 18458 ** 18459 ** Or, if this module is being built as a loadable extension, 18460 ** sqlite3Fts5Init() is omitted and the two standard entry points 18461 ** sqlite3_fts_init() and sqlite3_fts5_init() defined instead. 18462 */ 18463 #ifndef SQLITE_CORE 18464 #ifdef _WIN32 18465 __declspec(dllexport) 18466 #endif 18467 int sqlite3_fts_init( 18468 sqlite3 *db, 18469 char **pzErrMsg, 18470 const sqlite3_api_routines *pApi 18471 ){ 18472 SQLITE_EXTENSION_INIT2(pApi); 18473 (void)pzErrMsg; /* Unused parameter */ 18474 return fts5Init(db); 18475 } 18476 18477 #ifdef _WIN32 18478 __declspec(dllexport) 18479 #endif 18480 int sqlite3_fts5_init( 18481 sqlite3 *db, 18482 char **pzErrMsg, 18483 const sqlite3_api_routines *pApi 18484 ){ 18485 SQLITE_EXTENSION_INIT2(pApi); 18486 (void)pzErrMsg; /* Unused parameter */ 18487 return fts5Init(db); 18488 } 18489 #else 18490 int sqlite3Fts5Init(sqlite3 *db){ 18491 return fts5Init(db); 18492 } 18493 #endif 18494 18495 #line 1 "fts5_storage.c" 18496 /* 18497 ** 2014 May 31 18498 ** 18499 ** The author disclaims copyright to this source code. In place of 18500 ** a legal notice, here is a blessing: 18501 ** 18502 ** May you do good and not evil. 18503 ** May you find forgiveness for yourself and forgive others. 18504 ** May you share freely, never taking more than you give. 18505 ** 18506 ****************************************************************************** 18507 ** 18508 */ 18509 18510 18511 18512 /* #include "fts5Int.h" */ 18513 18514 struct Fts5Storage { 18515 Fts5Config *pConfig; 18516 Fts5Index *pIndex; 18517 int bTotalsValid; /* True if nTotalRow/aTotalSize[] are valid */ 18518 i64 nTotalRow; /* Total number of rows in FTS table */ 18519 i64 *aTotalSize; /* Total sizes of each column */ 18520 sqlite3_stmt *aStmt[11]; 18521 }; 18522 18523 18524 #if FTS5_STMT_SCAN_ASC!=0 18525 # error "FTS5_STMT_SCAN_ASC mismatch" 18526 #endif 18527 #if FTS5_STMT_SCAN_DESC!=1 18528 # error "FTS5_STMT_SCAN_DESC mismatch" 18529 #endif 18530 #if FTS5_STMT_LOOKUP!=2 18531 # error "FTS5_STMT_LOOKUP mismatch" 18532 #endif 18533 18534 #define FTS5_STMT_INSERT_CONTENT 3 18535 #define FTS5_STMT_REPLACE_CONTENT 4 18536 #define FTS5_STMT_DELETE_CONTENT 5 18537 #define FTS5_STMT_REPLACE_DOCSIZE 6 18538 #define FTS5_STMT_DELETE_DOCSIZE 7 18539 #define FTS5_STMT_LOOKUP_DOCSIZE 8 18540 #define FTS5_STMT_REPLACE_CONFIG 9 18541 #define FTS5_STMT_SCAN 10 18542 18543 /* 18544 ** Prepare the two insert statements - Fts5Storage.pInsertContent and 18545 ** Fts5Storage.pInsertDocsize - if they have not already been prepared. 18546 ** Return SQLITE_OK if successful, or an SQLite error code if an error 18547 ** occurs. 18548 */ 18549 static int fts5StorageGetStmt( 18550 Fts5Storage *p, /* Storage handle */ 18551 int eStmt, /* FTS5_STMT_XXX constant */ 18552 sqlite3_stmt **ppStmt, /* OUT: Prepared statement handle */ 18553 char **pzErrMsg /* OUT: Error message (if any) */ 18554 ){ 18555 int rc = SQLITE_OK; 18556 18557 /* If there is no %_docsize table, there should be no requests for 18558 ** statements to operate on it. */ 18559 assert( p->pConfig->bColumnsize || ( 18560 eStmt!=FTS5_STMT_REPLACE_DOCSIZE 18561 && eStmt!=FTS5_STMT_DELETE_DOCSIZE 18562 && eStmt!=FTS5_STMT_LOOKUP_DOCSIZE 18563 )); 18564 18565 assert( eStmt>=0 && eStmt<ArraySize(p->aStmt) ); 18566 if( p->aStmt[eStmt]==0 ){ 18567 const char *azStmt[] = { 18568 "SELECT %s FROM %s T WHERE T.%Q >= ? AND T.%Q <= ? ORDER BY T.%Q ASC", 18569 "SELECT %s FROM %s T WHERE T.%Q <= ? AND T.%Q >= ? ORDER BY T.%Q DESC", 18570 "SELECT %s FROM %s T WHERE T.%Q=?", /* LOOKUP */ 18571 18572 "INSERT INTO %Q.'%q_content' VALUES(%s)", /* INSERT_CONTENT */ 18573 "REPLACE INTO %Q.'%q_content' VALUES(%s)", /* REPLACE_CONTENT */ 18574 "DELETE FROM %Q.'%q_content' WHERE id=?", /* DELETE_CONTENT */ 18575 "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", /* REPLACE_DOCSIZE */ 18576 "DELETE FROM %Q.'%q_docsize' WHERE id=?", /* DELETE_DOCSIZE */ 18577 18578 "SELECT sz FROM %Q.'%q_docsize' WHERE id=?", /* LOOKUP_DOCSIZE */ 18579 18580 "REPLACE INTO %Q.'%q_config' VALUES(?,?)", /* REPLACE_CONFIG */ 18581 "SELECT %s FROM %s AS T", /* SCAN */ 18582 }; 18583 Fts5Config *pC = p->pConfig; 18584 char *zSql = 0; 18585 18586 switch( eStmt ){ 18587 case FTS5_STMT_SCAN: 18588 zSql = sqlite3_mprintf(azStmt[eStmt], 18589 pC->zContentExprlist, pC->zContent 18590 ); 18591 break; 18592 18593 case FTS5_STMT_SCAN_ASC: 18594 case FTS5_STMT_SCAN_DESC: 18595 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zContentExprlist, 18596 pC->zContent, pC->zContentRowid, pC->zContentRowid, 18597 pC->zContentRowid 18598 ); 18599 break; 18600 18601 case FTS5_STMT_LOOKUP: 18602 zSql = sqlite3_mprintf(azStmt[eStmt], 18603 pC->zContentExprlist, pC->zContent, pC->zContentRowid 18604 ); 18605 break; 18606 18607 case FTS5_STMT_INSERT_CONTENT: 18608 case FTS5_STMT_REPLACE_CONTENT: { 18609 int nCol = pC->nCol + 1; 18610 char *zBind; 18611 int i; 18612 18613 zBind = sqlite3_malloc64(1 + nCol*2); 18614 if( zBind ){ 18615 for(i=0; i<nCol; i++){ 18616 zBind[i*2] = '?'; 18617 zBind[i*2 + 1] = ','; 18618 } 18619 zBind[i*2-1] = '\0'; 18620 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind); 18621 sqlite3_free(zBind); 18622 } 18623 break; 18624 } 18625 18626 default: 18627 zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName); 18628 break; 18629 } 18630 18631 if( zSql==0 ){ 18632 rc = SQLITE_NOMEM; 18633 }else{ 18634 int f = SQLITE_PREPARE_PERSISTENT; 18635 if( eStmt>FTS5_STMT_LOOKUP ) f |= SQLITE_PREPARE_NO_VTAB; 18636 p->pConfig->bLock++; 18637 rc = sqlite3_prepare_v3(pC->db, zSql, -1, f, &p->aStmt[eStmt], 0); 18638 p->pConfig->bLock--; 18639 sqlite3_free(zSql); 18640 if( rc!=SQLITE_OK && pzErrMsg ){ 18641 *pzErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pC->db)); 18642 } 18643 } 18644 } 18645 18646 *ppStmt = p->aStmt[eStmt]; 18647 sqlite3_reset(*ppStmt); 18648 return rc; 18649 } 18650 18651 18652 static int fts5ExecPrintf( 18653 sqlite3 *db, 18654 char **pzErr, 18655 const char *zFormat, 18656 ... 18657 ){ 18658 int rc; 18659 va_list ap; /* ... printf arguments */ 18660 char *zSql; 18661 18662 va_start(ap, zFormat); 18663 zSql = sqlite3_vmprintf(zFormat, ap); 18664 18665 if( zSql==0 ){ 18666 rc = SQLITE_NOMEM; 18667 }else{ 18668 rc = sqlite3_exec(db, zSql, 0, 0, pzErr); 18669 sqlite3_free(zSql); 18670 } 18671 18672 va_end(ap); 18673 return rc; 18674 } 18675 18676 /* 18677 ** Drop all shadow tables. Return SQLITE_OK if successful or an SQLite error 18678 ** code otherwise. 18679 */ 18680 static int sqlite3Fts5DropAll(Fts5Config *pConfig){ 18681 int rc = fts5ExecPrintf(pConfig->db, 0, 18682 "DROP TABLE IF EXISTS %Q.'%q_data';" 18683 "DROP TABLE IF EXISTS %Q.'%q_idx';" 18684 "DROP TABLE IF EXISTS %Q.'%q_config';", 18685 pConfig->zDb, pConfig->zName, 18686 pConfig->zDb, pConfig->zName, 18687 pConfig->zDb, pConfig->zName 18688 ); 18689 if( rc==SQLITE_OK && pConfig->bColumnsize ){ 18690 rc = fts5ExecPrintf(pConfig->db, 0, 18691 "DROP TABLE IF EXISTS %Q.'%q_docsize';", 18692 pConfig->zDb, pConfig->zName 18693 ); 18694 } 18695 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ 18696 rc = fts5ExecPrintf(pConfig->db, 0, 18697 "DROP TABLE IF EXISTS %Q.'%q_content';", 18698 pConfig->zDb, pConfig->zName 18699 ); 18700 } 18701 return rc; 18702 } 18703 18704 static void fts5StorageRenameOne( 18705 Fts5Config *pConfig, /* Current FTS5 configuration */ 18706 int *pRc, /* IN/OUT: Error code */ 18707 const char *zTail, /* Tail of table name e.g. "data", "config" */ 18708 const char *zName /* New name of FTS5 table */ 18709 ){ 18710 if( *pRc==SQLITE_OK ){ 18711 *pRc = fts5ExecPrintf(pConfig->db, 0, 18712 "ALTER TABLE %Q.'%q_%s' RENAME TO '%q_%s';", 18713 pConfig->zDb, pConfig->zName, zTail, zName, zTail 18714 ); 18715 } 18716 } 18717 18718 static int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ 18719 Fts5Config *pConfig = pStorage->pConfig; 18720 int rc = sqlite3Fts5StorageSync(pStorage); 18721 18722 fts5StorageRenameOne(pConfig, &rc, "data", zName); 18723 fts5StorageRenameOne(pConfig, &rc, "idx", zName); 18724 fts5StorageRenameOne(pConfig, &rc, "config", zName); 18725 if( pConfig->bColumnsize ){ 18726 fts5StorageRenameOne(pConfig, &rc, "docsize", zName); 18727 } 18728 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ 18729 fts5StorageRenameOne(pConfig, &rc, "content", zName); 18730 } 18731 return rc; 18732 } 18733 18734 /* 18735 ** Create the shadow table named zPost, with definition zDefn. Return 18736 ** SQLITE_OK if successful, or an SQLite error code otherwise. 18737 */ 18738 static int sqlite3Fts5CreateTable( 18739 Fts5Config *pConfig, /* FTS5 configuration */ 18740 const char *zPost, /* Shadow table to create (e.g. "content") */ 18741 const char *zDefn, /* Columns etc. for shadow table */ 18742 int bWithout, /* True for without rowid */ 18743 char **pzErr /* OUT: Error message */ 18744 ){ 18745 int rc; 18746 char *zErr = 0; 18747 18748 rc = fts5ExecPrintf(pConfig->db, &zErr, "CREATE TABLE %Q.'%q_%q'(%s)%s", 18749 pConfig->zDb, pConfig->zName, zPost, zDefn, 18750 #ifndef SQLITE_FTS5_NO_WITHOUT_ROWID 18751 bWithout?" WITHOUT ROWID": 18752 #endif 18753 "" 18754 ); 18755 if( zErr ){ 18756 *pzErr = sqlite3_mprintf( 18757 "fts5: error creating shadow table %q_%s: %s", 18758 pConfig->zName, zPost, zErr 18759 ); 18760 sqlite3_free(zErr); 18761 } 18762 18763 return rc; 18764 } 18765 18766 /* 18767 ** Open a new Fts5Index handle. If the bCreate argument is true, create 18768 ** and initialize the underlying tables 18769 ** 18770 ** If successful, set *pp to point to the new object and return SQLITE_OK. 18771 ** Otherwise, set *pp to NULL and return an SQLite error code. 18772 */ 18773 static int sqlite3Fts5StorageOpen( 18774 Fts5Config *pConfig, 18775 Fts5Index *pIndex, 18776 int bCreate, 18777 Fts5Storage **pp, 18778 char **pzErr /* OUT: Error message */ 18779 ){ 18780 int rc = SQLITE_OK; 18781 Fts5Storage *p; /* New object */ 18782 sqlite3_int64 nByte; /* Bytes of space to allocate */ 18783 18784 nByte = sizeof(Fts5Storage) /* Fts5Storage object */ 18785 + pConfig->nCol * sizeof(i64); /* Fts5Storage.aTotalSize[] */ 18786 *pp = p = (Fts5Storage*)sqlite3_malloc64(nByte); 18787 if( !p ) return SQLITE_NOMEM; 18788 18789 memset(p, 0, (size_t)nByte); 18790 p->aTotalSize = (i64*)&p[1]; 18791 p->pConfig = pConfig; 18792 p->pIndex = pIndex; 18793 18794 if( bCreate ){ 18795 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ 18796 int nDefn = 32 + pConfig->nCol*10; 18797 char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10); 18798 if( zDefn==0 ){ 18799 rc = SQLITE_NOMEM; 18800 }else{ 18801 int i; 18802 int iOff; 18803 sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); 18804 iOff = (int)strlen(zDefn); 18805 for(i=0; i<pConfig->nCol; i++){ 18806 sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); 18807 iOff += (int)strlen(&zDefn[iOff]); 18808 } 18809 rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); 18810 } 18811 sqlite3_free(zDefn); 18812 } 18813 18814 if( rc==SQLITE_OK && pConfig->bColumnsize ){ 18815 rc = sqlite3Fts5CreateTable( 18816 pConfig, "docsize", "id INTEGER PRIMARY KEY, sz BLOB", 0, pzErr 18817 ); 18818 } 18819 if( rc==SQLITE_OK ){ 18820 rc = sqlite3Fts5CreateTable( 18821 pConfig, "config", "k PRIMARY KEY, v", 1, pzErr 18822 ); 18823 } 18824 if( rc==SQLITE_OK ){ 18825 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); 18826 } 18827 } 18828 18829 if( rc ){ 18830 sqlite3Fts5StorageClose(p); 18831 *pp = 0; 18832 } 18833 return rc; 18834 } 18835 18836 /* 18837 ** Close a handle opened by an earlier call to sqlite3Fts5StorageOpen(). 18838 */ 18839 static int sqlite3Fts5StorageClose(Fts5Storage *p){ 18840 int rc = SQLITE_OK; 18841 if( p ){ 18842 int i; 18843 18844 /* Finalize all SQL statements */ 18845 for(i=0; i<ArraySize(p->aStmt); i++){ 18846 sqlite3_finalize(p->aStmt[i]); 18847 } 18848 18849 sqlite3_free(p); 18850 } 18851 return rc; 18852 } 18853 18854 typedef struct Fts5InsertCtx Fts5InsertCtx; 18855 struct Fts5InsertCtx { 18856 Fts5Storage *pStorage; 18857 int iCol; 18858 int szCol; /* Size of column value in tokens */ 18859 }; 18860 18861 /* 18862 ** Tokenization callback used when inserting tokens into the FTS index. 18863 */ 18864 static int fts5StorageInsertCallback( 18865 void *pContext, /* Pointer to Fts5InsertCtx object */ 18866 int tflags, 18867 const char *pToken, /* Buffer containing token */ 18868 int nToken, /* Size of token in bytes */ 18869 int iUnused1, /* Start offset of token */ 18870 int iUnused2 /* End offset of token */ 18871 ){ 18872 Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext; 18873 Fts5Index *pIdx = pCtx->pStorage->pIndex; 18874 UNUSED_PARAM2(iUnused1, iUnused2); 18875 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; 18876 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ 18877 pCtx->szCol++; 18878 } 18879 return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken); 18880 } 18881 18882 /* 18883 ** If a row with rowid iDel is present in the %_content table, add the 18884 ** delete-markers to the FTS index necessary to delete it. Do not actually 18885 ** remove the %_content row at this time though. 18886 */ 18887 static int fts5StorageDeleteFromIndex( 18888 Fts5Storage *p, 18889 i64 iDel, 18890 sqlite3_value **apVal 18891 ){ 18892 Fts5Config *pConfig = p->pConfig; 18893 sqlite3_stmt *pSeek = 0; /* SELECT to read row iDel from %_data */ 18894 int rc; /* Return code */ 18895 int rc2; /* sqlite3_reset() return code */ 18896 int iCol; 18897 Fts5InsertCtx ctx; 18898 18899 if( apVal==0 ){ 18900 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP, &pSeek, 0); 18901 if( rc!=SQLITE_OK ) return rc; 18902 sqlite3_bind_int64(pSeek, 1, iDel); 18903 if( sqlite3_step(pSeek)!=SQLITE_ROW ){ 18904 return sqlite3_reset(pSeek); 18905 } 18906 } 18907 18908 ctx.pStorage = p; 18909 ctx.iCol = -1; 18910 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 1, iDel); 18911 for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){ 18912 if( pConfig->abUnindexed[iCol-1]==0 ){ 18913 const char *zText; 18914 int nText; 18915 assert( pSeek==0 || apVal==0 ); 18916 assert( pSeek!=0 || apVal!=0 ); 18917 if( pSeek ){ 18918 zText = (const char*)sqlite3_column_text(pSeek, iCol); 18919 nText = sqlite3_column_bytes(pSeek, iCol); 18920 }else if( ALWAYS(apVal) ){ 18921 zText = (const char*)sqlite3_value_text(apVal[iCol-1]); 18922 nText = sqlite3_value_bytes(apVal[iCol-1]); 18923 }else{ 18924 continue; 18925 } 18926 ctx.szCol = 0; 18927 rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, 18928 zText, nText, (void*)&ctx, fts5StorageInsertCallback 18929 ); 18930 p->aTotalSize[iCol-1] -= (i64)ctx.szCol; 18931 if( p->aTotalSize[iCol-1]<0 ){ 18932 rc = FTS5_CORRUPT; 18933 } 18934 } 18935 } 18936 if( rc==SQLITE_OK && p->nTotalRow<1 ){ 18937 rc = FTS5_CORRUPT; 18938 }else{ 18939 p->nTotalRow--; 18940 } 18941 18942 rc2 = sqlite3_reset(pSeek); 18943 if( rc==SQLITE_OK ) rc = rc2; 18944 return rc; 18945 } 18946 18947 18948 /* 18949 ** Insert a record into the %_docsize table. Specifically, do: 18950 ** 18951 ** INSERT OR REPLACE INTO %_docsize(id, sz) VALUES(iRowid, pBuf); 18952 ** 18953 ** If there is no %_docsize table (as happens if the columnsize=0 option 18954 ** is specified when the FTS5 table is created), this function is a no-op. 18955 */ 18956 static int fts5StorageInsertDocsize( 18957 Fts5Storage *p, /* Storage module to write to */ 18958 i64 iRowid, /* id value */ 18959 Fts5Buffer *pBuf /* sz value */ 18960 ){ 18961 int rc = SQLITE_OK; 18962 if( p->pConfig->bColumnsize ){ 18963 sqlite3_stmt *pReplace = 0; 18964 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); 18965 if( rc==SQLITE_OK ){ 18966 sqlite3_bind_int64(pReplace, 1, iRowid); 18967 sqlite3_bind_blob(pReplace, 2, pBuf->p, pBuf->n, SQLITE_STATIC); 18968 sqlite3_step(pReplace); 18969 rc = sqlite3_reset(pReplace); 18970 sqlite3_bind_null(pReplace, 2); 18971 } 18972 } 18973 return rc; 18974 } 18975 18976 /* 18977 ** Load the contents of the "averages" record from disk into the 18978 ** p->nTotalRow and p->aTotalSize[] variables. If successful, and if 18979 ** argument bCache is true, set the p->bTotalsValid flag to indicate 18980 ** that the contents of aTotalSize[] and nTotalRow are valid until 18981 ** further notice. 18982 ** 18983 ** Return SQLITE_OK if successful, or an SQLite error code if an error 18984 ** occurs. 18985 */ 18986 static int fts5StorageLoadTotals(Fts5Storage *p, int bCache){ 18987 int rc = SQLITE_OK; 18988 if( p->bTotalsValid==0 ){ 18989 rc = sqlite3Fts5IndexGetAverages(p->pIndex, &p->nTotalRow, p->aTotalSize); 18990 p->bTotalsValid = bCache; 18991 } 18992 return rc; 18993 } 18994 18995 /* 18996 ** Store the current contents of the p->nTotalRow and p->aTotalSize[] 18997 ** variables in the "averages" record on disk. 18998 ** 18999 ** Return SQLITE_OK if successful, or an SQLite error code if an error 19000 ** occurs. 19001 */ 19002 static int fts5StorageSaveTotals(Fts5Storage *p){ 19003 int nCol = p->pConfig->nCol; 19004 int i; 19005 Fts5Buffer buf; 19006 int rc = SQLITE_OK; 19007 memset(&buf, 0, sizeof(buf)); 19008 19009 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->nTotalRow); 19010 for(i=0; i<nCol; i++){ 19011 sqlite3Fts5BufferAppendVarint(&rc, &buf, p->aTotalSize[i]); 19012 } 19013 if( rc==SQLITE_OK ){ 19014 rc = sqlite3Fts5IndexSetAverages(p->pIndex, buf.p, buf.n); 19015 } 19016 sqlite3_free(buf.p); 19017 19018 return rc; 19019 } 19020 19021 /* 19022 ** Remove a row from the FTS table. 19023 */ 19024 static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64 iDel, sqlite3_value **apVal){ 19025 Fts5Config *pConfig = p->pConfig; 19026 int rc; 19027 sqlite3_stmt *pDel = 0; 19028 19029 assert( pConfig->eContent!=FTS5_CONTENT_NORMAL || apVal==0 ); 19030 rc = fts5StorageLoadTotals(p, 1); 19031 19032 /* Delete the index records */ 19033 if( rc==SQLITE_OK ){ 19034 rc = fts5StorageDeleteFromIndex(p, iDel, apVal); 19035 } 19036 19037 /* Delete the %_docsize record */ 19038 if( rc==SQLITE_OK && pConfig->bColumnsize ){ 19039 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_DOCSIZE, &pDel, 0); 19040 if( rc==SQLITE_OK ){ 19041 sqlite3_bind_int64(pDel, 1, iDel); 19042 sqlite3_step(pDel); 19043 rc = sqlite3_reset(pDel); 19044 } 19045 } 19046 19047 /* Delete the %_content record */ 19048 if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ 19049 if( rc==SQLITE_OK ){ 19050 rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); 19051 } 19052 if( rc==SQLITE_OK ){ 19053 sqlite3_bind_int64(pDel, 1, iDel); 19054 sqlite3_step(pDel); 19055 rc = sqlite3_reset(pDel); 19056 } 19057 } 19058 19059 return rc; 19060 } 19061 19062 /* 19063 ** Delete all entries in the FTS5 index. 19064 */ 19065 static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ 19066 Fts5Config *pConfig = p->pConfig; 19067 int rc; 19068 19069 p->bTotalsValid = 0; 19070 19071 /* Delete the contents of the %_data and %_docsize tables. */ 19072 rc = fts5ExecPrintf(pConfig->db, 0, 19073 "DELETE FROM %Q.'%q_data';" 19074 "DELETE FROM %Q.'%q_idx';", 19075 pConfig->zDb, pConfig->zName, 19076 pConfig->zDb, pConfig->zName 19077 ); 19078 if( rc==SQLITE_OK && pConfig->bColumnsize ){ 19079 rc = fts5ExecPrintf(pConfig->db, 0, 19080 "DELETE FROM %Q.'%q_docsize';", 19081 pConfig->zDb, pConfig->zName 19082 ); 19083 } 19084 19085 /* Reinitialize the %_data table. This call creates the initial structure 19086 ** and averages records. */ 19087 if( rc==SQLITE_OK ){ 19088 rc = sqlite3Fts5IndexReinit(p->pIndex); 19089 } 19090 if( rc==SQLITE_OK ){ 19091 rc = sqlite3Fts5StorageConfigValue(p, "version", 0, FTS5_CURRENT_VERSION); 19092 } 19093 return rc; 19094 } 19095 19096 static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ 19097 Fts5Buffer buf = {0,0,0}; 19098 Fts5Config *pConfig = p->pConfig; 19099 sqlite3_stmt *pScan = 0; 19100 Fts5InsertCtx ctx; 19101 int rc, rc2; 19102 19103 memset(&ctx, 0, sizeof(Fts5InsertCtx)); 19104 ctx.pStorage = p; 19105 rc = sqlite3Fts5StorageDeleteAll(p); 19106 if( rc==SQLITE_OK ){ 19107 rc = fts5StorageLoadTotals(p, 1); 19108 } 19109 19110 if( rc==SQLITE_OK ){ 19111 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); 19112 } 19113 19114 while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pScan) ){ 19115 i64 iRowid = sqlite3_column_int64(pScan, 0); 19116 19117 sqlite3Fts5BufferZero(&buf); 19118 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); 19119 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ 19120 ctx.szCol = 0; 19121 if( pConfig->abUnindexed[ctx.iCol]==0 ){ 19122 const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1); 19123 int nText = sqlite3_column_bytes(pScan, ctx.iCol+1); 19124 rc = sqlite3Fts5Tokenize(pConfig, 19125 FTS5_TOKENIZE_DOCUMENT, 19126 zText, nText, 19127 (void*)&ctx, 19128 fts5StorageInsertCallback 19129 ); 19130 } 19131 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); 19132 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; 19133 } 19134 p->nTotalRow++; 19135 19136 if( rc==SQLITE_OK ){ 19137 rc = fts5StorageInsertDocsize(p, iRowid, &buf); 19138 } 19139 } 19140 sqlite3_free(buf.p); 19141 rc2 = sqlite3_reset(pScan); 19142 if( rc==SQLITE_OK ) rc = rc2; 19143 19144 /* Write the averages record */ 19145 if( rc==SQLITE_OK ){ 19146 rc = fts5StorageSaveTotals(p); 19147 } 19148 return rc; 19149 } 19150 19151 static int sqlite3Fts5StorageOptimize(Fts5Storage *p){ 19152 return sqlite3Fts5IndexOptimize(p->pIndex); 19153 } 19154 19155 static int sqlite3Fts5StorageMerge(Fts5Storage *p, int nMerge){ 19156 return sqlite3Fts5IndexMerge(p->pIndex, nMerge); 19157 } 19158 19159 static int sqlite3Fts5StorageReset(Fts5Storage *p){ 19160 return sqlite3Fts5IndexReset(p->pIndex); 19161 } 19162 19163 /* 19164 ** Allocate a new rowid. This is used for "external content" tables when 19165 ** a NULL value is inserted into the rowid column. The new rowid is allocated 19166 ** by inserting a dummy row into the %_docsize table. The dummy will be 19167 ** overwritten later. 19168 ** 19169 ** If the %_docsize table does not exist, SQLITE_MISMATCH is returned. In 19170 ** this case the user is required to provide a rowid explicitly. 19171 */ 19172 static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ 19173 int rc = SQLITE_MISMATCH; 19174 if( p->pConfig->bColumnsize ){ 19175 sqlite3_stmt *pReplace = 0; 19176 rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_DOCSIZE, &pReplace, 0); 19177 if( rc==SQLITE_OK ){ 19178 sqlite3_bind_null(pReplace, 1); 19179 sqlite3_bind_null(pReplace, 2); 19180 sqlite3_step(pReplace); 19181 rc = sqlite3_reset(pReplace); 19182 } 19183 if( rc==SQLITE_OK ){ 19184 *piRowid = sqlite3_last_insert_rowid(p->pConfig->db); 19185 } 19186 } 19187 return rc; 19188 } 19189 19190 /* 19191 ** Insert a new row into the FTS content table. 19192 */ 19193 static int sqlite3Fts5StorageContentInsert( 19194 Fts5Storage *p, 19195 sqlite3_value **apVal, 19196 i64 *piRowid 19197 ){ 19198 Fts5Config *pConfig = p->pConfig; 19199 int rc = SQLITE_OK; 19200 19201 /* Insert the new row into the %_content table. */ 19202 if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ 19203 if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ 19204 *piRowid = sqlite3_value_int64(apVal[1]); 19205 }else{ 19206 rc = fts5StorageNewRowid(p, piRowid); 19207 } 19208 }else{ 19209 sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ 19210 int i; /* Counter variable */ 19211 rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); 19212 for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ 19213 rc = sqlite3_bind_value(pInsert, i, apVal[i]); 19214 } 19215 if( rc==SQLITE_OK ){ 19216 sqlite3_step(pInsert); 19217 rc = sqlite3_reset(pInsert); 19218 } 19219 *piRowid = sqlite3_last_insert_rowid(pConfig->db); 19220 } 19221 19222 return rc; 19223 } 19224 19225 /* 19226 ** Insert new entries into the FTS index and %_docsize table. 19227 */ 19228 static int sqlite3Fts5StorageIndexInsert( 19229 Fts5Storage *p, 19230 sqlite3_value **apVal, 19231 i64 iRowid 19232 ){ 19233 Fts5Config *pConfig = p->pConfig; 19234 int rc = SQLITE_OK; /* Return code */ 19235 Fts5InsertCtx ctx; /* Tokenization callback context object */ 19236 Fts5Buffer buf; /* Buffer used to build up %_docsize blob */ 19237 19238 memset(&buf, 0, sizeof(Fts5Buffer)); 19239 ctx.pStorage = p; 19240 rc = fts5StorageLoadTotals(p, 1); 19241 19242 if( rc==SQLITE_OK ){ 19243 rc = sqlite3Fts5IndexBeginWrite(p->pIndex, 0, iRowid); 19244 } 19245 for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){ 19246 ctx.szCol = 0; 19247 if( pConfig->abUnindexed[ctx.iCol]==0 ){ 19248 const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]); 19249 int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]); 19250 rc = sqlite3Fts5Tokenize(pConfig, 19251 FTS5_TOKENIZE_DOCUMENT, 19252 zText, nText, 19253 (void*)&ctx, 19254 fts5StorageInsertCallback 19255 ); 19256 } 19257 sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); 19258 p->aTotalSize[ctx.iCol] += (i64)ctx.szCol; 19259 } 19260 p->nTotalRow++; 19261 19262 /* Write the %_docsize record */ 19263 if( rc==SQLITE_OK ){ 19264 rc = fts5StorageInsertDocsize(p, iRowid, &buf); 19265 } 19266 sqlite3_free(buf.p); 19267 19268 return rc; 19269 } 19270 19271 static int fts5StorageCount(Fts5Storage *p, const char *zSuffix, i64 *pnRow){ 19272 Fts5Config *pConfig = p->pConfig; 19273 char *zSql; 19274 int rc; 19275 19276 zSql = sqlite3_mprintf("SELECT count(*) FROM %Q.'%q_%s'", 19277 pConfig->zDb, pConfig->zName, zSuffix 19278 ); 19279 if( zSql==0 ){ 19280 rc = SQLITE_NOMEM; 19281 }else{ 19282 sqlite3_stmt *pCnt = 0; 19283 rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &pCnt, 0); 19284 if( rc==SQLITE_OK ){ 19285 if( SQLITE_ROW==sqlite3_step(pCnt) ){ 19286 *pnRow = sqlite3_column_int64(pCnt, 0); 19287 } 19288 rc = sqlite3_finalize(pCnt); 19289 } 19290 } 19291 19292 sqlite3_free(zSql); 19293 return rc; 19294 } 19295 19296 /* 19297 ** Context object used by sqlite3Fts5StorageIntegrity(). 19298 */ 19299 typedef struct Fts5IntegrityCtx Fts5IntegrityCtx; 19300 struct Fts5IntegrityCtx { 19301 i64 iRowid; 19302 int iCol; 19303 int szCol; 19304 u64 cksum; 19305 Fts5Termset *pTermset; 19306 Fts5Config *pConfig; 19307 }; 19308 19309 19310 /* 19311 ** Tokenization callback used by integrity check. 19312 */ 19313 static int fts5StorageIntegrityCallback( 19314 void *pContext, /* Pointer to Fts5IntegrityCtx object */ 19315 int tflags, 19316 const char *pToken, /* Buffer containing token */ 19317 int nToken, /* Size of token in bytes */ 19318 int iUnused1, /* Start offset of token */ 19319 int iUnused2 /* End offset of token */ 19320 ){ 19321 Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext; 19322 Fts5Termset *pTermset = pCtx->pTermset; 19323 int bPresent; 19324 int ii; 19325 int rc = SQLITE_OK; 19326 int iPos; 19327 int iCol; 19328 19329 UNUSED_PARAM2(iUnused1, iUnused2); 19330 if( nToken>FTS5_MAX_TOKEN_SIZE ) nToken = FTS5_MAX_TOKEN_SIZE; 19331 19332 if( (tflags & FTS5_TOKEN_COLOCATED)==0 || pCtx->szCol==0 ){ 19333 pCtx->szCol++; 19334 } 19335 19336 switch( pCtx->pConfig->eDetail ){ 19337 case FTS5_DETAIL_FULL: 19338 iPos = pCtx->szCol-1; 19339 iCol = pCtx->iCol; 19340 break; 19341 19342 case FTS5_DETAIL_COLUMNS: 19343 iPos = pCtx->iCol; 19344 iCol = 0; 19345 break; 19346 19347 default: 19348 assert( pCtx->pConfig->eDetail==FTS5_DETAIL_NONE ); 19349 iPos = 0; 19350 iCol = 0; 19351 break; 19352 } 19353 19354 rc = sqlite3Fts5TermsetAdd(pTermset, 0, pToken, nToken, &bPresent); 19355 if( rc==SQLITE_OK && bPresent==0 ){ 19356 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( 19357 pCtx->iRowid, iCol, iPos, 0, pToken, nToken 19358 ); 19359 } 19360 19361 for(ii=0; rc==SQLITE_OK && ii<pCtx->pConfig->nPrefix; ii++){ 19362 const int nChar = pCtx->pConfig->aPrefix[ii]; 19363 int nByte = sqlite3Fts5IndexCharlenToBytelen(pToken, nToken, nChar); 19364 if( nByte ){ 19365 rc = sqlite3Fts5TermsetAdd(pTermset, ii+1, pToken, nByte, &bPresent); 19366 if( bPresent==0 ){ 19367 pCtx->cksum ^= sqlite3Fts5IndexEntryCksum( 19368 pCtx->iRowid, iCol, iPos, ii+1, pToken, nByte 19369 ); 19370 } 19371 } 19372 } 19373 19374 return rc; 19375 } 19376 19377 /* 19378 ** Check that the contents of the FTS index match that of the %_content 19379 ** table. Return SQLITE_OK if they do, or SQLITE_CORRUPT if not. Return 19380 ** some other SQLite error code if an error occurs while attempting to 19381 ** determine this. 19382 */ 19383 static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ 19384 Fts5Config *pConfig = p->pConfig; 19385 int rc = SQLITE_OK; /* Return code */ 19386 int *aColSize; /* Array of size pConfig->nCol */ 19387 i64 *aTotalSize; /* Array of size pConfig->nCol */ 19388 Fts5IntegrityCtx ctx; 19389 sqlite3_stmt *pScan; 19390 int bUseCksum; 19391 19392 memset(&ctx, 0, sizeof(Fts5IntegrityCtx)); 19393 ctx.pConfig = p->pConfig; 19394 aTotalSize = (i64*)sqlite3_malloc64(pConfig->nCol*(sizeof(int)+sizeof(i64))); 19395 if( !aTotalSize ) return SQLITE_NOMEM; 19396 aColSize = (int*)&aTotalSize[pConfig->nCol]; 19397 memset(aTotalSize, 0, sizeof(i64) * pConfig->nCol); 19398 19399 bUseCksum = (pConfig->eContent==FTS5_CONTENT_NORMAL 19400 || (pConfig->eContent==FTS5_CONTENT_EXTERNAL && iArg) 19401 ); 19402 if( bUseCksum ){ 19403 /* Generate the expected index checksum based on the contents of the 19404 ** %_content table. This block stores the checksum in ctx.cksum. */ 19405 rc = fts5StorageGetStmt(p, FTS5_STMT_SCAN, &pScan, 0); 19406 if( rc==SQLITE_OK ){ 19407 int rc2; 19408 while( SQLITE_ROW==sqlite3_step(pScan) ){ 19409 int i; 19410 ctx.iRowid = sqlite3_column_int64(pScan, 0); 19411 ctx.szCol = 0; 19412 if( pConfig->bColumnsize ){ 19413 rc = sqlite3Fts5StorageDocsize(p, ctx.iRowid, aColSize); 19414 } 19415 if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_NONE ){ 19416 rc = sqlite3Fts5TermsetNew(&ctx.pTermset); 19417 } 19418 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ 19419 if( pConfig->abUnindexed[i] ) continue; 19420 ctx.iCol = i; 19421 ctx.szCol = 0; 19422 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ 19423 rc = sqlite3Fts5TermsetNew(&ctx.pTermset); 19424 } 19425 if( rc==SQLITE_OK ){ 19426 const char *zText = (const char*)sqlite3_column_text(pScan, i+1); 19427 int nText = sqlite3_column_bytes(pScan, i+1); 19428 rc = sqlite3Fts5Tokenize(pConfig, 19429 FTS5_TOKENIZE_DOCUMENT, 19430 zText, nText, 19431 (void*)&ctx, 19432 fts5StorageIntegrityCallback 19433 ); 19434 } 19435 if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ 19436 rc = FTS5_CORRUPT; 19437 } 19438 aTotalSize[i] += ctx.szCol; 19439 if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ 19440 sqlite3Fts5TermsetFree(ctx.pTermset); 19441 ctx.pTermset = 0; 19442 } 19443 } 19444 sqlite3Fts5TermsetFree(ctx.pTermset); 19445 ctx.pTermset = 0; 19446 19447 if( rc!=SQLITE_OK ) break; 19448 } 19449 rc2 = sqlite3_reset(pScan); 19450 if( rc==SQLITE_OK ) rc = rc2; 19451 } 19452 19453 /* Test that the "totals" (sometimes called "averages") record looks Ok */ 19454 if( rc==SQLITE_OK ){ 19455 int i; 19456 rc = fts5StorageLoadTotals(p, 0); 19457 for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){ 19458 if( p->aTotalSize[i]!=aTotalSize[i] ) rc = FTS5_CORRUPT; 19459 } 19460 } 19461 19462 /* Check that the %_docsize and %_content tables contain the expected 19463 ** number of rows. */ 19464 if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ 19465 i64 nRow = 0; 19466 rc = fts5StorageCount(p, "content", &nRow); 19467 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; 19468 } 19469 if( rc==SQLITE_OK && pConfig->bColumnsize ){ 19470 i64 nRow = 0; 19471 rc = fts5StorageCount(p, "docsize", &nRow); 19472 if( rc==SQLITE_OK && nRow!=p->nTotalRow ) rc = FTS5_CORRUPT; 19473 } 19474 } 19475 19476 /* Pass the expected checksum down to the FTS index module. It will 19477 ** verify, amongst other things, that it matches the checksum generated by 19478 ** inspecting the index itself. */ 19479 if( rc==SQLITE_OK ){ 19480 rc = sqlite3Fts5IndexIntegrityCheck(p->pIndex, ctx.cksum, bUseCksum); 19481 } 19482 19483 sqlite3_free(aTotalSize); 19484 return rc; 19485 } 19486 19487 /* 19488 ** Obtain an SQLite statement handle that may be used to read data from the 19489 ** %_content table. 19490 */ 19491 static int sqlite3Fts5StorageStmt( 19492 Fts5Storage *p, 19493 int eStmt, 19494 sqlite3_stmt **pp, 19495 char **pzErrMsg 19496 ){ 19497 int rc; 19498 assert( eStmt==FTS5_STMT_SCAN_ASC 19499 || eStmt==FTS5_STMT_SCAN_DESC 19500 || eStmt==FTS5_STMT_LOOKUP 19501 ); 19502 rc = fts5StorageGetStmt(p, eStmt, pp, pzErrMsg); 19503 if( rc==SQLITE_OK ){ 19504 assert( p->aStmt[eStmt]==*pp ); 19505 p->aStmt[eStmt] = 0; 19506 } 19507 return rc; 19508 } 19509 19510 /* 19511 ** Release an SQLite statement handle obtained via an earlier call to 19512 ** sqlite3Fts5StorageStmt(). The eStmt parameter passed to this function 19513 ** must match that passed to the sqlite3Fts5StorageStmt() call. 19514 */ 19515 static void sqlite3Fts5StorageStmtRelease( 19516 Fts5Storage *p, 19517 int eStmt, 19518 sqlite3_stmt *pStmt 19519 ){ 19520 assert( eStmt==FTS5_STMT_SCAN_ASC 19521 || eStmt==FTS5_STMT_SCAN_DESC 19522 || eStmt==FTS5_STMT_LOOKUP 19523 ); 19524 if( p->aStmt[eStmt]==0 ){ 19525 sqlite3_reset(pStmt); 19526 p->aStmt[eStmt] = pStmt; 19527 }else{ 19528 sqlite3_finalize(pStmt); 19529 } 19530 } 19531 19532 static int fts5StorageDecodeSizeArray( 19533 int *aCol, int nCol, /* Array to populate */ 19534 const u8 *aBlob, int nBlob /* Record to read varints from */ 19535 ){ 19536 int i; 19537 int iOff = 0; 19538 for(i=0; i<nCol; i++){ 19539 if( iOff>=nBlob ) return 1; 19540 iOff += fts5GetVarint32(&aBlob[iOff], aCol[i]); 19541 } 19542 return (iOff!=nBlob); 19543 } 19544 19545 /* 19546 ** Argument aCol points to an array of integers containing one entry for 19547 ** each table column. This function reads the %_docsize record for the 19548 ** specified rowid and populates aCol[] with the results. 19549 ** 19550 ** An SQLite error code is returned if an error occurs, or SQLITE_OK 19551 ** otherwise. 19552 */ 19553 static int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ 19554 int nCol = p->pConfig->nCol; /* Number of user columns in table */ 19555 sqlite3_stmt *pLookup = 0; /* Statement to query %_docsize */ 19556 int rc; /* Return Code */ 19557 19558 assert( p->pConfig->bColumnsize ); 19559 rc = fts5StorageGetStmt(p, FTS5_STMT_LOOKUP_DOCSIZE, &pLookup, 0); 19560 if( pLookup ){ 19561 int bCorrupt = 1; 19562 assert( rc==SQLITE_OK ); 19563 sqlite3_bind_int64(pLookup, 1, iRowid); 19564 if( SQLITE_ROW==sqlite3_step(pLookup) ){ 19565 const u8 *aBlob = sqlite3_column_blob(pLookup, 0); 19566 int nBlob = sqlite3_column_bytes(pLookup, 0); 19567 if( 0==fts5StorageDecodeSizeArray(aCol, nCol, aBlob, nBlob) ){ 19568 bCorrupt = 0; 19569 } 19570 } 19571 rc = sqlite3_reset(pLookup); 19572 if( bCorrupt && rc==SQLITE_OK ){ 19573 rc = FTS5_CORRUPT; 19574 } 19575 }else{ 19576 assert( rc!=SQLITE_OK ); 19577 } 19578 19579 return rc; 19580 } 19581 19582 static int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ 19583 int rc = fts5StorageLoadTotals(p, 0); 19584 if( rc==SQLITE_OK ){ 19585 *pnToken = 0; 19586 if( iCol<0 ){ 19587 int i; 19588 for(i=0; i<p->pConfig->nCol; i++){ 19589 *pnToken += p->aTotalSize[i]; 19590 } 19591 }else if( iCol<p->pConfig->nCol ){ 19592 *pnToken = p->aTotalSize[iCol]; 19593 }else{ 19594 rc = SQLITE_RANGE; 19595 } 19596 } 19597 return rc; 19598 } 19599 19600 static int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ 19601 int rc = fts5StorageLoadTotals(p, 0); 19602 if( rc==SQLITE_OK ){ 19603 /* nTotalRow being zero does not necessarily indicate a corrupt 19604 ** database - it might be that the FTS5 table really does contain zero 19605 ** rows. However this function is only called from the xRowCount() API, 19606 ** and there is no way for that API to be invoked if the table contains 19607 ** no rows. Hence the FTS5_CORRUPT return. */ 19608 *pnRow = p->nTotalRow; 19609 if( p->nTotalRow<=0 ) rc = FTS5_CORRUPT; 19610 } 19611 return rc; 19612 } 19613 19614 /* 19615 ** Flush any data currently held in-memory to disk. 19616 */ 19617 static int sqlite3Fts5StorageSync(Fts5Storage *p){ 19618 int rc = SQLITE_OK; 19619 i64 iLastRowid = sqlite3_last_insert_rowid(p->pConfig->db); 19620 if( p->bTotalsValid ){ 19621 rc = fts5StorageSaveTotals(p); 19622 p->bTotalsValid = 0; 19623 } 19624 if( rc==SQLITE_OK ){ 19625 rc = sqlite3Fts5IndexSync(p->pIndex); 19626 } 19627 sqlite3_set_last_insert_rowid(p->pConfig->db, iLastRowid); 19628 return rc; 19629 } 19630 19631 static int sqlite3Fts5StorageRollback(Fts5Storage *p){ 19632 p->bTotalsValid = 0; 19633 return sqlite3Fts5IndexRollback(p->pIndex); 19634 } 19635 19636 static int sqlite3Fts5StorageConfigValue( 19637 Fts5Storage *p, 19638 const char *z, 19639 sqlite3_value *pVal, 19640 int iVal 19641 ){ 19642 sqlite3_stmt *pReplace = 0; 19643 int rc = fts5StorageGetStmt(p, FTS5_STMT_REPLACE_CONFIG, &pReplace, 0); 19644 if( rc==SQLITE_OK ){ 19645 sqlite3_bind_text(pReplace, 1, z, -1, SQLITE_STATIC); 19646 if( pVal ){ 19647 sqlite3_bind_value(pReplace, 2, pVal); 19648 }else{ 19649 sqlite3_bind_int(pReplace, 2, iVal); 19650 } 19651 sqlite3_step(pReplace); 19652 rc = sqlite3_reset(pReplace); 19653 sqlite3_bind_null(pReplace, 1); 19654 } 19655 if( rc==SQLITE_OK && pVal ){ 19656 int iNew = p->pConfig->iCookie + 1; 19657 rc = sqlite3Fts5IndexSetCookie(p->pIndex, iNew); 19658 if( rc==SQLITE_OK ){ 19659 p->pConfig->iCookie = iNew; 19660 } 19661 } 19662 return rc; 19663 } 19664 19665 #line 1 "fts5_tokenize.c" 19666 /* 19667 ** 2014 May 31 19668 ** 19669 ** The author disclaims copyright to this source code. In place of 19670 ** a legal notice, here is a blessing: 19671 ** 19672 ** May you do good and not evil. 19673 ** May you find forgiveness for yourself and forgive others. 19674 ** May you share freely, never taking more than you give. 19675 ** 19676 ****************************************************************************** 19677 */ 19678 19679 19680 /* #include "fts5Int.h" */ 19681 19682 /************************************************************************** 19683 ** Start of ascii tokenizer implementation. 19684 */ 19685 19686 /* 19687 ** For tokenizers with no "unicode" modifier, the set of token characters 19688 ** is the same as the set of ASCII range alphanumeric characters. 19689 */ 19690 static unsigned char aAsciiTokenChar[128] = { 19691 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00..0x0F */ 19692 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10..0x1F */ 19693 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20..0x2F */ 19694 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0x30..0x3F */ 19695 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40..0x4F */ 19696 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x50..0x5F */ 19697 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60..0x6F */ 19698 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x70..0x7F */ 19699 }; 19700 19701 typedef struct AsciiTokenizer AsciiTokenizer; 19702 struct AsciiTokenizer { 19703 unsigned char aTokenChar[128]; 19704 }; 19705 19706 static void fts5AsciiAddExceptions( 19707 AsciiTokenizer *p, 19708 const char *zArg, 19709 int bTokenChars 19710 ){ 19711 int i; 19712 for(i=0; zArg[i]; i++){ 19713 if( (zArg[i] & 0x80)==0 ){ 19714 p->aTokenChar[(int)zArg[i]] = (unsigned char)bTokenChars; 19715 } 19716 } 19717 } 19718 19719 /* 19720 ** Delete a "ascii" tokenizer. 19721 */ 19722 static void fts5AsciiDelete(Fts5Tokenizer *p){ 19723 sqlite3_free(p); 19724 } 19725 19726 /* 19727 ** Create an "ascii" tokenizer. 19728 */ 19729 static int fts5AsciiCreate( 19730 void *pUnused, 19731 const char **azArg, int nArg, 19732 Fts5Tokenizer **ppOut 19733 ){ 19734 int rc = SQLITE_OK; 19735 AsciiTokenizer *p = 0; 19736 UNUSED_PARAM(pUnused); 19737 if( nArg%2 ){ 19738 rc = SQLITE_ERROR; 19739 }else{ 19740 p = sqlite3_malloc(sizeof(AsciiTokenizer)); 19741 if( p==0 ){ 19742 rc = SQLITE_NOMEM; 19743 }else{ 19744 int i; 19745 memset(p, 0, sizeof(AsciiTokenizer)); 19746 memcpy(p->aTokenChar, aAsciiTokenChar, sizeof(aAsciiTokenChar)); 19747 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ 19748 const char *zArg = azArg[i+1]; 19749 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ 19750 fts5AsciiAddExceptions(p, zArg, 1); 19751 }else 19752 if( 0==sqlite3_stricmp(azArg[i], "separators") ){ 19753 fts5AsciiAddExceptions(p, zArg, 0); 19754 }else{ 19755 rc = SQLITE_ERROR; 19756 } 19757 } 19758 if( rc!=SQLITE_OK ){ 19759 fts5AsciiDelete((Fts5Tokenizer*)p); 19760 p = 0; 19761 } 19762 } 19763 } 19764 19765 *ppOut = (Fts5Tokenizer*)p; 19766 return rc; 19767 } 19768 19769 19770 static void asciiFold(char *aOut, const char *aIn, int nByte){ 19771 int i; 19772 for(i=0; i<nByte; i++){ 19773 char c = aIn[i]; 19774 if( c>='A' && c<='Z' ) c += 32; 19775 aOut[i] = c; 19776 } 19777 } 19778 19779 /* 19780 ** Tokenize some text using the ascii tokenizer. 19781 */ 19782 static int fts5AsciiTokenize( 19783 Fts5Tokenizer *pTokenizer, 19784 void *pCtx, 19785 int iUnused, 19786 const char *pText, int nText, 19787 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) 19788 ){ 19789 AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer; 19790 int rc = SQLITE_OK; 19791 int ie; 19792 int is = 0; 19793 19794 char aFold[64]; 19795 int nFold = sizeof(aFold); 19796 char *pFold = aFold; 19797 unsigned char *a = p->aTokenChar; 19798 19799 UNUSED_PARAM(iUnused); 19800 19801 while( is<nText && rc==SQLITE_OK ){ 19802 int nByte; 19803 19804 /* Skip any leading divider characters. */ 19805 while( is<nText && ((pText[is]&0x80)==0 && a[(int)pText[is]]==0) ){ 19806 is++; 19807 } 19808 if( is==nText ) break; 19809 19810 /* Count the token characters */ 19811 ie = is+1; 19812 while( ie<nText && ((pText[ie]&0x80) || a[(int)pText[ie]] ) ){ 19813 ie++; 19814 } 19815 19816 /* Fold to lower case */ 19817 nByte = ie-is; 19818 if( nByte>nFold ){ 19819 if( pFold!=aFold ) sqlite3_free(pFold); 19820 pFold = sqlite3_malloc64((sqlite3_int64)nByte*2); 19821 if( pFold==0 ){ 19822 rc = SQLITE_NOMEM; 19823 break; 19824 } 19825 nFold = nByte*2; 19826 } 19827 asciiFold(pFold, &pText[is], nByte); 19828 19829 /* Invoke the token callback */ 19830 rc = xToken(pCtx, 0, pFold, nByte, is, ie); 19831 is = ie+1; 19832 } 19833 19834 if( pFold!=aFold ) sqlite3_free(pFold); 19835 if( rc==SQLITE_DONE ) rc = SQLITE_OK; 19836 return rc; 19837 } 19838 19839 /************************************************************************** 19840 ** Start of unicode61 tokenizer implementation. 19841 */ 19842 19843 19844 /* 19845 ** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied 19846 ** from the sqlite3 source file utf.c. If this file is compiled as part 19847 ** of the amalgamation, they are not required. 19848 */ 19849 #ifndef SQLITE_AMALGAMATION 19850 19851 static const unsigned char sqlite3Utf8Trans1[] = { 19852 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 19853 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 19854 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 19855 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 19856 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 19857 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 19858 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 19859 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, 19860 }; 19861 19862 #define READ_UTF8(zIn, zTerm, c) \ 19863 c = *(zIn++); \ 19864 if( c>=0xc0 ){ \ 19865 c = sqlite3Utf8Trans1[c-0xc0]; \ 19866 while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ 19867 c = (c<<6) + (0x3f & *(zIn++)); \ 19868 } \ 19869 if( c<0x80 \ 19870 || (c&0xFFFFF800)==0xD800 \ 19871 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ 19872 } 19873 19874 19875 #define WRITE_UTF8(zOut, c) { \ 19876 if( c<0x00080 ){ \ 19877 *zOut++ = (unsigned char)(c&0xFF); \ 19878 } \ 19879 else if( c<0x00800 ){ \ 19880 *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \ 19881 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ 19882 } \ 19883 else if( c<0x10000 ){ \ 19884 *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \ 19885 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ 19886 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ 19887 }else{ \ 19888 *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \ 19889 *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \ 19890 *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \ 19891 *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \ 19892 } \ 19893 } 19894 19895 #endif /* ifndef SQLITE_AMALGAMATION */ 19896 19897 typedef struct Unicode61Tokenizer Unicode61Tokenizer; 19898 struct Unicode61Tokenizer { 19899 unsigned char aTokenChar[128]; /* ASCII range token characters */ 19900 char *aFold; /* Buffer to fold text into */ 19901 int nFold; /* Size of aFold[] in bytes */ 19902 int eRemoveDiacritic; /* True if remove_diacritics=1 is set */ 19903 int nException; 19904 int *aiException; 19905 19906 unsigned char aCategory[32]; /* True for token char categories */ 19907 }; 19908 19909 /* Values for eRemoveDiacritic (must match internals of fts5_unicode2.c) */ 19910 #define FTS5_REMOVE_DIACRITICS_NONE 0 19911 #define FTS5_REMOVE_DIACRITICS_SIMPLE 1 19912 #define FTS5_REMOVE_DIACRITICS_COMPLEX 2 19913 19914 static int fts5UnicodeAddExceptions( 19915 Unicode61Tokenizer *p, /* Tokenizer object */ 19916 const char *z, /* Characters to treat as exceptions */ 19917 int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */ 19918 ){ 19919 int rc = SQLITE_OK; 19920 int n = (int)strlen(z); 19921 int *aNew; 19922 19923 if( n>0 ){ 19924 aNew = (int*)sqlite3_realloc64(p->aiException, 19925 (n+p->nException)*sizeof(int)); 19926 if( aNew ){ 19927 int nNew = p->nException; 19928 const unsigned char *zCsr = (const unsigned char*)z; 19929 const unsigned char *zTerm = (const unsigned char*)&z[n]; 19930 while( zCsr<zTerm ){ 19931 u32 iCode; 19932 int bToken; 19933 READ_UTF8(zCsr, zTerm, iCode); 19934 if( iCode<128 ){ 19935 p->aTokenChar[iCode] = (unsigned char)bTokenChars; 19936 }else{ 19937 bToken = p->aCategory[sqlite3Fts5UnicodeCategory(iCode)]; 19938 assert( (bToken==0 || bToken==1) ); 19939 assert( (bTokenChars==0 || bTokenChars==1) ); 19940 if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){ 19941 int i; 19942 for(i=0; i<nNew; i++){ 19943 if( (u32)aNew[i]>iCode ) break; 19944 } 19945 memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int)); 19946 aNew[i] = iCode; 19947 nNew++; 19948 } 19949 } 19950 } 19951 p->aiException = aNew; 19952 p->nException = nNew; 19953 }else{ 19954 rc = SQLITE_NOMEM; 19955 } 19956 } 19957 19958 return rc; 19959 } 19960 19961 /* 19962 ** Return true if the p->aiException[] array contains the value iCode. 19963 */ 19964 static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){ 19965 if( p->nException>0 ){ 19966 int *a = p->aiException; 19967 int iLo = 0; 19968 int iHi = p->nException-1; 19969 19970 while( iHi>=iLo ){ 19971 int iTest = (iHi + iLo) / 2; 19972 if( iCode==a[iTest] ){ 19973 return 1; 19974 }else if( iCode>a[iTest] ){ 19975 iLo = iTest+1; 19976 }else{ 19977 iHi = iTest-1; 19978 } 19979 } 19980 } 19981 19982 return 0; 19983 } 19984 19985 /* 19986 ** Delete a "unicode61" tokenizer. 19987 */ 19988 static void fts5UnicodeDelete(Fts5Tokenizer *pTok){ 19989 if( pTok ){ 19990 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok; 19991 sqlite3_free(p->aiException); 19992 sqlite3_free(p->aFold); 19993 sqlite3_free(p); 19994 } 19995 return; 19996 } 19997 19998 static int unicodeSetCategories(Unicode61Tokenizer *p, const char *zCat){ 19999 const char *z = zCat; 20000 20001 while( *z ){ 20002 while( *z==' ' || *z=='\t' ) z++; 20003 if( *z && sqlite3Fts5UnicodeCatParse(z, p->aCategory) ){ 20004 return SQLITE_ERROR; 20005 } 20006 while( *z!=' ' && *z!='\t' && *z!='\0' ) z++; 20007 } 20008 20009 sqlite3Fts5UnicodeAscii(p->aCategory, p->aTokenChar); 20010 return SQLITE_OK; 20011 } 20012 20013 /* 20014 ** Create a "unicode61" tokenizer. 20015 */ 20016 static int fts5UnicodeCreate( 20017 void *pUnused, 20018 const char **azArg, int nArg, 20019 Fts5Tokenizer **ppOut 20020 ){ 20021 int rc = SQLITE_OK; /* Return code */ 20022 Unicode61Tokenizer *p = 0; /* New tokenizer object */ 20023 20024 UNUSED_PARAM(pUnused); 20025 20026 if( nArg%2 ){ 20027 rc = SQLITE_ERROR; 20028 }else{ 20029 p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer)); 20030 if( p ){ 20031 const char *zCat = "L* N* Co"; 20032 int i; 20033 memset(p, 0, sizeof(Unicode61Tokenizer)); 20034 20035 p->eRemoveDiacritic = FTS5_REMOVE_DIACRITICS_SIMPLE; 20036 p->nFold = 64; 20037 p->aFold = sqlite3_malloc64(p->nFold * sizeof(char)); 20038 if( p->aFold==0 ){ 20039 rc = SQLITE_NOMEM; 20040 } 20041 20042 /* Search for a "categories" argument */ 20043 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ 20044 if( 0==sqlite3_stricmp(azArg[i], "categories") ){ 20045 zCat = azArg[i+1]; 20046 } 20047 } 20048 20049 if( rc==SQLITE_OK ){ 20050 rc = unicodeSetCategories(p, zCat); 20051 } 20052 20053 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ 20054 const char *zArg = azArg[i+1]; 20055 if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ 20056 if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ 20057 rc = SQLITE_ERROR; 20058 }else{ 20059 p->eRemoveDiacritic = (zArg[0] - '0'); 20060 assert( p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_NONE 20061 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_SIMPLE 20062 || p->eRemoveDiacritic==FTS5_REMOVE_DIACRITICS_COMPLEX 20063 ); 20064 } 20065 }else 20066 if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){ 20067 rc = fts5UnicodeAddExceptions(p, zArg, 1); 20068 }else 20069 if( 0==sqlite3_stricmp(azArg[i], "separators") ){ 20070 rc = fts5UnicodeAddExceptions(p, zArg, 0); 20071 }else 20072 if( 0==sqlite3_stricmp(azArg[i], "categories") ){ 20073 /* no-op */ 20074 }else{ 20075 rc = SQLITE_ERROR; 20076 } 20077 } 20078 20079 }else{ 20080 rc = SQLITE_NOMEM; 20081 } 20082 if( rc!=SQLITE_OK ){ 20083 fts5UnicodeDelete((Fts5Tokenizer*)p); 20084 p = 0; 20085 } 20086 *ppOut = (Fts5Tokenizer*)p; 20087 } 20088 return rc; 20089 } 20090 20091 /* 20092 ** Return true if, for the purposes of tokenizing with the tokenizer 20093 ** passed as the first argument, codepoint iCode is considered a token 20094 ** character (not a separator). 20095 */ 20096 static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){ 20097 return ( 20098 p->aCategory[sqlite3Fts5UnicodeCategory((u32)iCode)] 20099 ^ fts5UnicodeIsException(p, iCode) 20100 ); 20101 } 20102 20103 static int fts5UnicodeTokenize( 20104 Fts5Tokenizer *pTokenizer, 20105 void *pCtx, 20106 int iUnused, 20107 const char *pText, int nText, 20108 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) 20109 ){ 20110 Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer; 20111 int rc = SQLITE_OK; 20112 unsigned char *a = p->aTokenChar; 20113 20114 unsigned char *zTerm = (unsigned char*)&pText[nText]; 20115 unsigned char *zCsr = (unsigned char *)pText; 20116 20117 /* Output buffer */ 20118 char *aFold = p->aFold; 20119 int nFold = p->nFold; 20120 const char *pEnd = &aFold[nFold-6]; 20121 20122 UNUSED_PARAM(iUnused); 20123 20124 /* Each iteration of this loop gobbles up a contiguous run of separators, 20125 ** then the next token. */ 20126 while( rc==SQLITE_OK ){ 20127 u32 iCode; /* non-ASCII codepoint read from input */ 20128 char *zOut = aFold; 20129 int is; 20130 int ie; 20131 20132 /* Skip any separator characters. */ 20133 while( 1 ){ 20134 if( zCsr>=zTerm ) goto tokenize_done; 20135 if( *zCsr & 0x80 ) { 20136 /* A character outside of the ascii range. Skip past it if it is 20137 ** a separator character. Or break out of the loop if it is not. */ 20138 is = zCsr - (unsigned char*)pText; 20139 READ_UTF8(zCsr, zTerm, iCode); 20140 if( fts5UnicodeIsAlnum(p, iCode) ){ 20141 goto non_ascii_tokenchar; 20142 } 20143 }else{ 20144 if( a[*zCsr] ){ 20145 is = zCsr - (unsigned char*)pText; 20146 goto ascii_tokenchar; 20147 } 20148 zCsr++; 20149 } 20150 } 20151 20152 /* Run through the tokenchars. Fold them into the output buffer along 20153 ** the way. */ 20154 while( zCsr<zTerm ){ 20155 20156 /* Grow the output buffer so that there is sufficient space to fit the 20157 ** largest possible utf-8 character. */ 20158 if( zOut>pEnd ){ 20159 aFold = sqlite3_malloc64((sqlite3_int64)nFold*2); 20160 if( aFold==0 ){ 20161 rc = SQLITE_NOMEM; 20162 goto tokenize_done; 20163 } 20164 zOut = &aFold[zOut - p->aFold]; 20165 memcpy(aFold, p->aFold, nFold); 20166 sqlite3_free(p->aFold); 20167 p->aFold = aFold; 20168 p->nFold = nFold = nFold*2; 20169 pEnd = &aFold[nFold-6]; 20170 } 20171 20172 if( *zCsr & 0x80 ){ 20173 /* An non-ascii-range character. Fold it into the output buffer if 20174 ** it is a token character, or break out of the loop if it is not. */ 20175 READ_UTF8(zCsr, zTerm, iCode); 20176 if( fts5UnicodeIsAlnum(p,iCode)||sqlite3Fts5UnicodeIsdiacritic(iCode) ){ 20177 non_ascii_tokenchar: 20178 iCode = sqlite3Fts5UnicodeFold(iCode, p->eRemoveDiacritic); 20179 if( iCode ) WRITE_UTF8(zOut, iCode); 20180 }else{ 20181 break; 20182 } 20183 }else if( a[*zCsr]==0 ){ 20184 /* An ascii-range separator character. End of token. */ 20185 break; 20186 }else{ 20187 ascii_tokenchar: 20188 if( *zCsr>='A' && *zCsr<='Z' ){ 20189 *zOut++ = *zCsr + 32; 20190 }else{ 20191 *zOut++ = *zCsr; 20192 } 20193 zCsr++; 20194 } 20195 ie = zCsr - (unsigned char*)pText; 20196 } 20197 20198 /* Invoke the token callback */ 20199 rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie); 20200 } 20201 20202 tokenize_done: 20203 if( rc==SQLITE_DONE ) rc = SQLITE_OK; 20204 return rc; 20205 } 20206 20207 /************************************************************************** 20208 ** Start of porter stemmer implementation. 20209 */ 20210 20211 /* Any tokens larger than this (in bytes) are passed through without 20212 ** stemming. */ 20213 #define FTS5_PORTER_MAX_TOKEN 64 20214 20215 typedef struct PorterTokenizer PorterTokenizer; 20216 struct PorterTokenizer { 20217 fts5_tokenizer tokenizer; /* Parent tokenizer module */ 20218 Fts5Tokenizer *pTokenizer; /* Parent tokenizer instance */ 20219 char aBuf[FTS5_PORTER_MAX_TOKEN + 64]; 20220 }; 20221 20222 /* 20223 ** Delete a "porter" tokenizer. 20224 */ 20225 static void fts5PorterDelete(Fts5Tokenizer *pTok){ 20226 if( pTok ){ 20227 PorterTokenizer *p = (PorterTokenizer*)pTok; 20228 if( p->pTokenizer ){ 20229 p->tokenizer.xDelete(p->pTokenizer); 20230 } 20231 sqlite3_free(p); 20232 } 20233 } 20234 20235 /* 20236 ** Create a "porter" tokenizer. 20237 */ 20238 static int fts5PorterCreate( 20239 void *pCtx, 20240 const char **azArg, int nArg, 20241 Fts5Tokenizer **ppOut 20242 ){ 20243 fts5_api *pApi = (fts5_api*)pCtx; 20244 int rc = SQLITE_OK; 20245 PorterTokenizer *pRet; 20246 void *pUserdata = 0; 20247 const char *zBase = "unicode61"; 20248 20249 if( nArg>0 ){ 20250 zBase = azArg[0]; 20251 } 20252 20253 pRet = (PorterTokenizer*)sqlite3_malloc(sizeof(PorterTokenizer)); 20254 if( pRet ){ 20255 memset(pRet, 0, sizeof(PorterTokenizer)); 20256 rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); 20257 }else{ 20258 rc = SQLITE_NOMEM; 20259 } 20260 if( rc==SQLITE_OK ){ 20261 int nArg2 = (nArg>0 ? nArg-1 : 0); 20262 const char **azArg2 = (nArg2 ? &azArg[1] : 0); 20263 rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer); 20264 } 20265 20266 if( rc!=SQLITE_OK ){ 20267 fts5PorterDelete((Fts5Tokenizer*)pRet); 20268 pRet = 0; 20269 } 20270 *ppOut = (Fts5Tokenizer*)pRet; 20271 return rc; 20272 } 20273 20274 typedef struct PorterContext PorterContext; 20275 struct PorterContext { 20276 void *pCtx; 20277 int (*xToken)(void*, int, const char*, int, int, int); 20278 char *aBuf; 20279 }; 20280 20281 typedef struct PorterRule PorterRule; 20282 struct PorterRule { 20283 const char *zSuffix; 20284 int nSuffix; 20285 int (*xCond)(char *zStem, int nStem); 20286 const char *zOutput; 20287 int nOutput; 20288 }; 20289 20290 #if 0 20291 static int fts5PorterApply(char *aBuf, int *pnBuf, PorterRule *aRule){ 20292 int ret = -1; 20293 int nBuf = *pnBuf; 20294 PorterRule *p; 20295 20296 for(p=aRule; p->zSuffix; p++){ 20297 assert( strlen(p->zSuffix)==p->nSuffix ); 20298 assert( strlen(p->zOutput)==p->nOutput ); 20299 if( nBuf<p->nSuffix ) continue; 20300 if( 0==memcmp(&aBuf[nBuf - p->nSuffix], p->zSuffix, p->nSuffix) ) break; 20301 } 20302 20303 if( p->zSuffix ){ 20304 int nStem = nBuf - p->nSuffix; 20305 if( p->xCond==0 || p->xCond(aBuf, nStem) ){ 20306 memcpy(&aBuf[nStem], p->zOutput, p->nOutput); 20307 *pnBuf = nStem + p->nOutput; 20308 ret = p - aRule; 20309 } 20310 } 20311 20312 return ret; 20313 } 20314 #endif 20315 20316 static int fts5PorterIsVowel(char c, int bYIsVowel){ 20317 return ( 20318 c=='a' || c=='e' || c=='i' || c=='o' || c=='u' || (bYIsVowel && c=='y') 20319 ); 20320 } 20321 20322 static int fts5PorterGobbleVC(char *zStem, int nStem, int bPrevCons){ 20323 int i; 20324 int bCons = bPrevCons; 20325 20326 /* Scan for a vowel */ 20327 for(i=0; i<nStem; i++){ 20328 if( 0==(bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) break; 20329 } 20330 20331 /* Scan for a consonent */ 20332 for(i++; i<nStem; i++){ 20333 if( (bCons = !fts5PorterIsVowel(zStem[i], bCons)) ) return i+1; 20334 } 20335 return 0; 20336 } 20337 20338 /* porter rule condition: (m > 0) */ 20339 static int fts5Porter_MGt0(char *zStem, int nStem){ 20340 return !!fts5PorterGobbleVC(zStem, nStem, 0); 20341 } 20342 20343 /* porter rule condition: (m > 1) */ 20344 static int fts5Porter_MGt1(char *zStem, int nStem){ 20345 int n; 20346 n = fts5PorterGobbleVC(zStem, nStem, 0); 20347 if( n && fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ 20348 return 1; 20349 } 20350 return 0; 20351 } 20352 20353 /* porter rule condition: (m = 1) */ 20354 static int fts5Porter_MEq1(char *zStem, int nStem){ 20355 int n; 20356 n = fts5PorterGobbleVC(zStem, nStem, 0); 20357 if( n && 0==fts5PorterGobbleVC(&zStem[n], nStem-n, 1) ){ 20358 return 1; 20359 } 20360 return 0; 20361 } 20362 20363 /* porter rule condition: (*o) */ 20364 static int fts5Porter_Ostar(char *zStem, int nStem){ 20365 if( zStem[nStem-1]=='w' || zStem[nStem-1]=='x' || zStem[nStem-1]=='y' ){ 20366 return 0; 20367 }else{ 20368 int i; 20369 int mask = 0; 20370 int bCons = 0; 20371 for(i=0; i<nStem; i++){ 20372 bCons = !fts5PorterIsVowel(zStem[i], bCons); 20373 assert( bCons==0 || bCons==1 ); 20374 mask = (mask << 1) + bCons; 20375 } 20376 return ((mask & 0x0007)==0x0005); 20377 } 20378 } 20379 20380 /* porter rule condition: (m > 1 and (*S or *T)) */ 20381 static int fts5Porter_MGt1_and_S_or_T(char *zStem, int nStem){ 20382 assert( nStem>0 ); 20383 return (zStem[nStem-1]=='s' || zStem[nStem-1]=='t') 20384 && fts5Porter_MGt1(zStem, nStem); 20385 } 20386 20387 /* porter rule condition: (*v*) */ 20388 static int fts5Porter_Vowel(char *zStem, int nStem){ 20389 int i; 20390 for(i=0; i<nStem; i++){ 20391 if( fts5PorterIsVowel(zStem[i], i>0) ){ 20392 return 1; 20393 } 20394 } 20395 return 0; 20396 } 20397 20398 20399 /************************************************************************** 20400 *************************************************************************** 20401 ** GENERATED CODE STARTS HERE (mkportersteps.tcl) 20402 */ 20403 20404 static int fts5PorterStep4(char *aBuf, int *pnBuf){ 20405 int ret = 0; 20406 int nBuf = *pnBuf; 20407 switch( aBuf[nBuf-2] ){ 20408 20409 case 'a': 20410 if( nBuf>2 && 0==memcmp("al", &aBuf[nBuf-2], 2) ){ 20411 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ 20412 *pnBuf = nBuf - 2; 20413 } 20414 } 20415 break; 20416 20417 case 'c': 20418 if( nBuf>4 && 0==memcmp("ance", &aBuf[nBuf-4], 4) ){ 20419 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ 20420 *pnBuf = nBuf - 4; 20421 } 20422 }else if( nBuf>4 && 0==memcmp("ence", &aBuf[nBuf-4], 4) ){ 20423 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ 20424 *pnBuf = nBuf - 4; 20425 } 20426 } 20427 break; 20428 20429 case 'e': 20430 if( nBuf>2 && 0==memcmp("er", &aBuf[nBuf-2], 2) ){ 20431 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ 20432 *pnBuf = nBuf - 2; 20433 } 20434 } 20435 break; 20436 20437 case 'i': 20438 if( nBuf>2 && 0==memcmp("ic", &aBuf[nBuf-2], 2) ){ 20439 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ 20440 *pnBuf = nBuf - 2; 20441 } 20442 } 20443 break; 20444 20445 case 'l': 20446 if( nBuf>4 && 0==memcmp("able", &aBuf[nBuf-4], 4) ){ 20447 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ 20448 *pnBuf = nBuf - 4; 20449 } 20450 }else if( nBuf>4 && 0==memcmp("ible", &aBuf[nBuf-4], 4) ){ 20451 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ 20452 *pnBuf = nBuf - 4; 20453 } 20454 } 20455 break; 20456 20457 case 'n': 20458 if( nBuf>3 && 0==memcmp("ant", &aBuf[nBuf-3], 3) ){ 20459 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20460 *pnBuf = nBuf - 3; 20461 } 20462 }else if( nBuf>5 && 0==memcmp("ement", &aBuf[nBuf-5], 5) ){ 20463 if( fts5Porter_MGt1(aBuf, nBuf-5) ){ 20464 *pnBuf = nBuf - 5; 20465 } 20466 }else if( nBuf>4 && 0==memcmp("ment", &aBuf[nBuf-4], 4) ){ 20467 if( fts5Porter_MGt1(aBuf, nBuf-4) ){ 20468 *pnBuf = nBuf - 4; 20469 } 20470 }else if( nBuf>3 && 0==memcmp("ent", &aBuf[nBuf-3], 3) ){ 20471 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20472 *pnBuf = nBuf - 3; 20473 } 20474 } 20475 break; 20476 20477 case 'o': 20478 if( nBuf>3 && 0==memcmp("ion", &aBuf[nBuf-3], 3) ){ 20479 if( fts5Porter_MGt1_and_S_or_T(aBuf, nBuf-3) ){ 20480 *pnBuf = nBuf - 3; 20481 } 20482 }else if( nBuf>2 && 0==memcmp("ou", &aBuf[nBuf-2], 2) ){ 20483 if( fts5Porter_MGt1(aBuf, nBuf-2) ){ 20484 *pnBuf = nBuf - 2; 20485 } 20486 } 20487 break; 20488 20489 case 's': 20490 if( nBuf>3 && 0==memcmp("ism", &aBuf[nBuf-3], 3) ){ 20491 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20492 *pnBuf = nBuf - 3; 20493 } 20494 } 20495 break; 20496 20497 case 't': 20498 if( nBuf>3 && 0==memcmp("ate", &aBuf[nBuf-3], 3) ){ 20499 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20500 *pnBuf = nBuf - 3; 20501 } 20502 }else if( nBuf>3 && 0==memcmp("iti", &aBuf[nBuf-3], 3) ){ 20503 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20504 *pnBuf = nBuf - 3; 20505 } 20506 } 20507 break; 20508 20509 case 'u': 20510 if( nBuf>3 && 0==memcmp("ous", &aBuf[nBuf-3], 3) ){ 20511 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20512 *pnBuf = nBuf - 3; 20513 } 20514 } 20515 break; 20516 20517 case 'v': 20518 if( nBuf>3 && 0==memcmp("ive", &aBuf[nBuf-3], 3) ){ 20519 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20520 *pnBuf = nBuf - 3; 20521 } 20522 } 20523 break; 20524 20525 case 'z': 20526 if( nBuf>3 && 0==memcmp("ize", &aBuf[nBuf-3], 3) ){ 20527 if( fts5Porter_MGt1(aBuf, nBuf-3) ){ 20528 *pnBuf = nBuf - 3; 20529 } 20530 } 20531 break; 20532 20533 } 20534 return ret; 20535 } 20536 20537 20538 static int fts5PorterStep1B2(char *aBuf, int *pnBuf){ 20539 int ret = 0; 20540 int nBuf = *pnBuf; 20541 switch( aBuf[nBuf-2] ){ 20542 20543 case 'a': 20544 if( nBuf>2 && 0==memcmp("at", &aBuf[nBuf-2], 2) ){ 20545 memcpy(&aBuf[nBuf-2], "ate", 3); 20546 *pnBuf = nBuf - 2 + 3; 20547 ret = 1; 20548 } 20549 break; 20550 20551 case 'b': 20552 if( nBuf>2 && 0==memcmp("bl", &aBuf[nBuf-2], 2) ){ 20553 memcpy(&aBuf[nBuf-2], "ble", 3); 20554 *pnBuf = nBuf - 2 + 3; 20555 ret = 1; 20556 } 20557 break; 20558 20559 case 'i': 20560 if( nBuf>2 && 0==memcmp("iz", &aBuf[nBuf-2], 2) ){ 20561 memcpy(&aBuf[nBuf-2], "ize", 3); 20562 *pnBuf = nBuf - 2 + 3; 20563 ret = 1; 20564 } 20565 break; 20566 20567 } 20568 return ret; 20569 } 20570 20571 20572 static int fts5PorterStep2(char *aBuf, int *pnBuf){ 20573 int ret = 0; 20574 int nBuf = *pnBuf; 20575 switch( aBuf[nBuf-2] ){ 20576 20577 case 'a': 20578 if( nBuf>7 && 0==memcmp("ational", &aBuf[nBuf-7], 7) ){ 20579 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ 20580 memcpy(&aBuf[nBuf-7], "ate", 3); 20581 *pnBuf = nBuf - 7 + 3; 20582 } 20583 }else if( nBuf>6 && 0==memcmp("tional", &aBuf[nBuf-6], 6) ){ 20584 if( fts5Porter_MGt0(aBuf, nBuf-6) ){ 20585 memcpy(&aBuf[nBuf-6], "tion", 4); 20586 *pnBuf = nBuf - 6 + 4; 20587 } 20588 } 20589 break; 20590 20591 case 'c': 20592 if( nBuf>4 && 0==memcmp("enci", &aBuf[nBuf-4], 4) ){ 20593 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20594 memcpy(&aBuf[nBuf-4], "ence", 4); 20595 *pnBuf = nBuf - 4 + 4; 20596 } 20597 }else if( nBuf>4 && 0==memcmp("anci", &aBuf[nBuf-4], 4) ){ 20598 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20599 memcpy(&aBuf[nBuf-4], "ance", 4); 20600 *pnBuf = nBuf - 4 + 4; 20601 } 20602 } 20603 break; 20604 20605 case 'e': 20606 if( nBuf>4 && 0==memcmp("izer", &aBuf[nBuf-4], 4) ){ 20607 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20608 memcpy(&aBuf[nBuf-4], "ize", 3); 20609 *pnBuf = nBuf - 4 + 3; 20610 } 20611 } 20612 break; 20613 20614 case 'g': 20615 if( nBuf>4 && 0==memcmp("logi", &aBuf[nBuf-4], 4) ){ 20616 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20617 memcpy(&aBuf[nBuf-4], "log", 3); 20618 *pnBuf = nBuf - 4 + 3; 20619 } 20620 } 20621 break; 20622 20623 case 'l': 20624 if( nBuf>3 && 0==memcmp("bli", &aBuf[nBuf-3], 3) ){ 20625 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ 20626 memcpy(&aBuf[nBuf-3], "ble", 3); 20627 *pnBuf = nBuf - 3 + 3; 20628 } 20629 }else if( nBuf>4 && 0==memcmp("alli", &aBuf[nBuf-4], 4) ){ 20630 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20631 memcpy(&aBuf[nBuf-4], "al", 2); 20632 *pnBuf = nBuf - 4 + 2; 20633 } 20634 }else if( nBuf>5 && 0==memcmp("entli", &aBuf[nBuf-5], 5) ){ 20635 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20636 memcpy(&aBuf[nBuf-5], "ent", 3); 20637 *pnBuf = nBuf - 5 + 3; 20638 } 20639 }else if( nBuf>3 && 0==memcmp("eli", &aBuf[nBuf-3], 3) ){ 20640 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ 20641 memcpy(&aBuf[nBuf-3], "e", 1); 20642 *pnBuf = nBuf - 3 + 1; 20643 } 20644 }else if( nBuf>5 && 0==memcmp("ousli", &aBuf[nBuf-5], 5) ){ 20645 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20646 memcpy(&aBuf[nBuf-5], "ous", 3); 20647 *pnBuf = nBuf - 5 + 3; 20648 } 20649 } 20650 break; 20651 20652 case 'o': 20653 if( nBuf>7 && 0==memcmp("ization", &aBuf[nBuf-7], 7) ){ 20654 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ 20655 memcpy(&aBuf[nBuf-7], "ize", 3); 20656 *pnBuf = nBuf - 7 + 3; 20657 } 20658 }else if( nBuf>5 && 0==memcmp("ation", &aBuf[nBuf-5], 5) ){ 20659 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20660 memcpy(&aBuf[nBuf-5], "ate", 3); 20661 *pnBuf = nBuf - 5 + 3; 20662 } 20663 }else if( nBuf>4 && 0==memcmp("ator", &aBuf[nBuf-4], 4) ){ 20664 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20665 memcpy(&aBuf[nBuf-4], "ate", 3); 20666 *pnBuf = nBuf - 4 + 3; 20667 } 20668 } 20669 break; 20670 20671 case 's': 20672 if( nBuf>5 && 0==memcmp("alism", &aBuf[nBuf-5], 5) ){ 20673 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20674 memcpy(&aBuf[nBuf-5], "al", 2); 20675 *pnBuf = nBuf - 5 + 2; 20676 } 20677 }else if( nBuf>7 && 0==memcmp("iveness", &aBuf[nBuf-7], 7) ){ 20678 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ 20679 memcpy(&aBuf[nBuf-7], "ive", 3); 20680 *pnBuf = nBuf - 7 + 3; 20681 } 20682 }else if( nBuf>7 && 0==memcmp("fulness", &aBuf[nBuf-7], 7) ){ 20683 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ 20684 memcpy(&aBuf[nBuf-7], "ful", 3); 20685 *pnBuf = nBuf - 7 + 3; 20686 } 20687 }else if( nBuf>7 && 0==memcmp("ousness", &aBuf[nBuf-7], 7) ){ 20688 if( fts5Porter_MGt0(aBuf, nBuf-7) ){ 20689 memcpy(&aBuf[nBuf-7], "ous", 3); 20690 *pnBuf = nBuf - 7 + 3; 20691 } 20692 } 20693 break; 20694 20695 case 't': 20696 if( nBuf>5 && 0==memcmp("aliti", &aBuf[nBuf-5], 5) ){ 20697 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20698 memcpy(&aBuf[nBuf-5], "al", 2); 20699 *pnBuf = nBuf - 5 + 2; 20700 } 20701 }else if( nBuf>5 && 0==memcmp("iviti", &aBuf[nBuf-5], 5) ){ 20702 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20703 memcpy(&aBuf[nBuf-5], "ive", 3); 20704 *pnBuf = nBuf - 5 + 3; 20705 } 20706 }else if( nBuf>6 && 0==memcmp("biliti", &aBuf[nBuf-6], 6) ){ 20707 if( fts5Porter_MGt0(aBuf, nBuf-6) ){ 20708 memcpy(&aBuf[nBuf-6], "ble", 3); 20709 *pnBuf = nBuf - 6 + 3; 20710 } 20711 } 20712 break; 20713 20714 } 20715 return ret; 20716 } 20717 20718 20719 static int fts5PorterStep3(char *aBuf, int *pnBuf){ 20720 int ret = 0; 20721 int nBuf = *pnBuf; 20722 switch( aBuf[nBuf-2] ){ 20723 20724 case 'a': 20725 if( nBuf>4 && 0==memcmp("ical", &aBuf[nBuf-4], 4) ){ 20726 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20727 memcpy(&aBuf[nBuf-4], "ic", 2); 20728 *pnBuf = nBuf - 4 + 2; 20729 } 20730 } 20731 break; 20732 20733 case 's': 20734 if( nBuf>4 && 0==memcmp("ness", &aBuf[nBuf-4], 4) ){ 20735 if( fts5Porter_MGt0(aBuf, nBuf-4) ){ 20736 *pnBuf = nBuf - 4; 20737 } 20738 } 20739 break; 20740 20741 case 't': 20742 if( nBuf>5 && 0==memcmp("icate", &aBuf[nBuf-5], 5) ){ 20743 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20744 memcpy(&aBuf[nBuf-5], "ic", 2); 20745 *pnBuf = nBuf - 5 + 2; 20746 } 20747 }else if( nBuf>5 && 0==memcmp("iciti", &aBuf[nBuf-5], 5) ){ 20748 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20749 memcpy(&aBuf[nBuf-5], "ic", 2); 20750 *pnBuf = nBuf - 5 + 2; 20751 } 20752 } 20753 break; 20754 20755 case 'u': 20756 if( nBuf>3 && 0==memcmp("ful", &aBuf[nBuf-3], 3) ){ 20757 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ 20758 *pnBuf = nBuf - 3; 20759 } 20760 } 20761 break; 20762 20763 case 'v': 20764 if( nBuf>5 && 0==memcmp("ative", &aBuf[nBuf-5], 5) ){ 20765 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20766 *pnBuf = nBuf - 5; 20767 } 20768 } 20769 break; 20770 20771 case 'z': 20772 if( nBuf>5 && 0==memcmp("alize", &aBuf[nBuf-5], 5) ){ 20773 if( fts5Porter_MGt0(aBuf, nBuf-5) ){ 20774 memcpy(&aBuf[nBuf-5], "al", 2); 20775 *pnBuf = nBuf - 5 + 2; 20776 } 20777 } 20778 break; 20779 20780 } 20781 return ret; 20782 } 20783 20784 20785 static int fts5PorterStep1B(char *aBuf, int *pnBuf){ 20786 int ret = 0; 20787 int nBuf = *pnBuf; 20788 switch( aBuf[nBuf-2] ){ 20789 20790 case 'e': 20791 if( nBuf>3 && 0==memcmp("eed", &aBuf[nBuf-3], 3) ){ 20792 if( fts5Porter_MGt0(aBuf, nBuf-3) ){ 20793 memcpy(&aBuf[nBuf-3], "ee", 2); 20794 *pnBuf = nBuf - 3 + 2; 20795 } 20796 }else if( nBuf>2 && 0==memcmp("ed", &aBuf[nBuf-2], 2) ){ 20797 if( fts5Porter_Vowel(aBuf, nBuf-2) ){ 20798 *pnBuf = nBuf - 2; 20799 ret = 1; 20800 } 20801 } 20802 break; 20803 20804 case 'n': 20805 if( nBuf>3 && 0==memcmp("ing", &aBuf[nBuf-3], 3) ){ 20806 if( fts5Porter_Vowel(aBuf, nBuf-3) ){ 20807 *pnBuf = nBuf - 3; 20808 ret = 1; 20809 } 20810 } 20811 break; 20812 20813 } 20814 return ret; 20815 } 20816 20817 /* 20818 ** GENERATED CODE ENDS HERE (mkportersteps.tcl) 20819 *************************************************************************** 20820 **************************************************************************/ 20821 20822 static void fts5PorterStep1A(char *aBuf, int *pnBuf){ 20823 int nBuf = *pnBuf; 20824 if( aBuf[nBuf-1]=='s' ){ 20825 if( aBuf[nBuf-2]=='e' ){ 20826 if( (nBuf>4 && aBuf[nBuf-4]=='s' && aBuf[nBuf-3]=='s') 20827 || (nBuf>3 && aBuf[nBuf-3]=='i' ) 20828 ){ 20829 *pnBuf = nBuf-2; 20830 }else{ 20831 *pnBuf = nBuf-1; 20832 } 20833 } 20834 else if( aBuf[nBuf-2]!='s' ){ 20835 *pnBuf = nBuf-1; 20836 } 20837 } 20838 } 20839 20840 static int fts5PorterCb( 20841 void *pCtx, 20842 int tflags, 20843 const char *pToken, 20844 int nToken, 20845 int iStart, 20846 int iEnd 20847 ){ 20848 PorterContext *p = (PorterContext*)pCtx; 20849 20850 char *aBuf; 20851 int nBuf; 20852 20853 if( nToken>FTS5_PORTER_MAX_TOKEN || nToken<3 ) goto pass_through; 20854 aBuf = p->aBuf; 20855 nBuf = nToken; 20856 memcpy(aBuf, pToken, nBuf); 20857 20858 /* Step 1. */ 20859 fts5PorterStep1A(aBuf, &nBuf); 20860 if( fts5PorterStep1B(aBuf, &nBuf) ){ 20861 if( fts5PorterStep1B2(aBuf, &nBuf)==0 ){ 20862 char c = aBuf[nBuf-1]; 20863 if( fts5PorterIsVowel(c, 0)==0 20864 && c!='l' && c!='s' && c!='z' && c==aBuf[nBuf-2] 20865 ){ 20866 nBuf--; 20867 }else if( fts5Porter_MEq1(aBuf, nBuf) && fts5Porter_Ostar(aBuf, nBuf) ){ 20868 aBuf[nBuf++] = 'e'; 20869 } 20870 } 20871 } 20872 20873 /* Step 1C. */ 20874 if( aBuf[nBuf-1]=='y' && fts5Porter_Vowel(aBuf, nBuf-1) ){ 20875 aBuf[nBuf-1] = 'i'; 20876 } 20877 20878 /* Steps 2 through 4. */ 20879 fts5PorterStep2(aBuf, &nBuf); 20880 fts5PorterStep3(aBuf, &nBuf); 20881 fts5PorterStep4(aBuf, &nBuf); 20882 20883 /* Step 5a. */ 20884 assert( nBuf>0 ); 20885 if( aBuf[nBuf-1]=='e' ){ 20886 if( fts5Porter_MGt1(aBuf, nBuf-1) 20887 || (fts5Porter_MEq1(aBuf, nBuf-1) && !fts5Porter_Ostar(aBuf, nBuf-1)) 20888 ){ 20889 nBuf--; 20890 } 20891 } 20892 20893 /* Step 5b. */ 20894 if( nBuf>1 && aBuf[nBuf-1]=='l' 20895 && aBuf[nBuf-2]=='l' && fts5Porter_MGt1(aBuf, nBuf-1) 20896 ){ 20897 nBuf--; 20898 } 20899 20900 return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd); 20901 20902 pass_through: 20903 return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd); 20904 } 20905 20906 /* 20907 ** Tokenize using the porter tokenizer. 20908 */ 20909 static int fts5PorterTokenize( 20910 Fts5Tokenizer *pTokenizer, 20911 void *pCtx, 20912 int flags, 20913 const char *pText, int nText, 20914 int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd) 20915 ){ 20916 PorterTokenizer *p = (PorterTokenizer*)pTokenizer; 20917 PorterContext sCtx; 20918 sCtx.xToken = xToken; 20919 sCtx.pCtx = pCtx; 20920 sCtx.aBuf = p->aBuf; 20921 return p->tokenizer.xTokenize( 20922 p->pTokenizer, (void*)&sCtx, flags, pText, nText, fts5PorterCb 20923 ); 20924 } 20925 20926 /************************************************************************** 20927 ** Start of trigram implementation. 20928 */ 20929 typedef struct TrigramTokenizer TrigramTokenizer; 20930 struct TrigramTokenizer { 20931 int bFold; /* True to fold to lower-case */ 20932 }; 20933 20934 /* 20935 ** Free a trigram tokenizer. 20936 */ 20937 static void fts5TriDelete(Fts5Tokenizer *p){ 20938 sqlite3_free(p); 20939 } 20940 20941 /* 20942 ** Allocate a trigram tokenizer. 20943 */ 20944 static int fts5TriCreate( 20945 void *pUnused, 20946 const char **azArg, 20947 int nArg, 20948 Fts5Tokenizer **ppOut 20949 ){ 20950 int rc = SQLITE_OK; 20951 TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); 20952 UNUSED_PARAM(pUnused); 20953 if( pNew==0 ){ 20954 rc = SQLITE_NOMEM; 20955 }else{ 20956 int i; 20957 pNew->bFold = 1; 20958 for(i=0; rc==SQLITE_OK && i<nArg; i+=2){ 20959 const char *zArg = azArg[i+1]; 20960 if( 0==sqlite3_stricmp(azArg[i], "case_sensitive") ){ 20961 if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){ 20962 rc = SQLITE_ERROR; 20963 }else{ 20964 pNew->bFold = (zArg[0]=='0'); 20965 } 20966 }else{ 20967 rc = SQLITE_ERROR; 20968 } 20969 } 20970 if( rc!=SQLITE_OK ){ 20971 fts5TriDelete((Fts5Tokenizer*)pNew); 20972 pNew = 0; 20973 } 20974 } 20975 *ppOut = (Fts5Tokenizer*)pNew; 20976 return rc; 20977 } 20978 20979 /* 20980 ** Trigram tokenizer tokenize routine. 20981 */ 20982 static int fts5TriTokenize( 20983 Fts5Tokenizer *pTok, 20984 void *pCtx, 20985 int unusedFlags, 20986 const char *pText, int nText, 20987 int (*xToken)(void*, int, const char*, int, int, int) 20988 ){ 20989 TrigramTokenizer *p = (TrigramTokenizer*)pTok; 20990 int rc = SQLITE_OK; 20991 char aBuf[32]; 20992 const unsigned char *zIn = (const unsigned char*)pText; 20993 const unsigned char *zEof = &zIn[nText]; 20994 u32 iCode; 20995 20996 UNUSED_PARAM(unusedFlags); 20997 while( 1 ){ 20998 char *zOut = aBuf; 20999 int iStart = zIn - (const unsigned char*)pText; 21000 const unsigned char *zNext; 21001 21002 READ_UTF8(zIn, zEof, iCode); 21003 if( iCode==0 ) break; 21004 zNext = zIn; 21005 if( zIn<zEof ){ 21006 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); 21007 WRITE_UTF8(zOut, iCode); 21008 READ_UTF8(zIn, zEof, iCode); 21009 if( iCode==0 ) break; 21010 }else{ 21011 break; 21012 } 21013 if( zIn<zEof ){ 21014 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); 21015 WRITE_UTF8(zOut, iCode); 21016 READ_UTF8(zIn, zEof, iCode); 21017 if( iCode==0 ) break; 21018 if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, 0); 21019 WRITE_UTF8(zOut, iCode); 21020 }else{ 21021 break; 21022 } 21023 rc = xToken(pCtx, 0, aBuf, zOut-aBuf, iStart, iStart + zOut-aBuf); 21024 if( rc!=SQLITE_OK ) break; 21025 zIn = zNext; 21026 } 21027 21028 return rc; 21029 } 21030 21031 /* 21032 ** Argument xCreate is a pointer to a constructor function for a tokenizer. 21033 ** pTok is a tokenizer previously created using the same method. This function 21034 ** returns one of FTS5_PATTERN_NONE, FTS5_PATTERN_LIKE or FTS5_PATTERN_GLOB 21035 ** indicating the style of pattern matching that the tokenizer can support. 21036 ** In practice, this is: 21037 ** 21038 ** "trigram" tokenizer, case_sensitive=1 - FTS5_PATTERN_GLOB 21039 ** "trigram" tokenizer, case_sensitive=0 (the default) - FTS5_PATTERN_LIKE 21040 ** all other tokenizers - FTS5_PATTERN_NONE 21041 */ 21042 static int sqlite3Fts5TokenizerPattern( 21043 int (*xCreate)(void*, const char**, int, Fts5Tokenizer**), 21044 Fts5Tokenizer *pTok 21045 ){ 21046 if( xCreate==fts5TriCreate ){ 21047 TrigramTokenizer *p = (TrigramTokenizer*)pTok; 21048 return p->bFold ? FTS5_PATTERN_LIKE : FTS5_PATTERN_GLOB; 21049 } 21050 return FTS5_PATTERN_NONE; 21051 } 21052 21053 /* 21054 ** Register all built-in tokenizers with FTS5. 21055 */ 21056 static int sqlite3Fts5TokenizerInit(fts5_api *pApi){ 21057 struct BuiltinTokenizer { 21058 const char *zName; 21059 fts5_tokenizer x; 21060 } aBuiltin[] = { 21061 { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}}, 21062 { "ascii", {fts5AsciiCreate, fts5AsciiDelete, fts5AsciiTokenize }}, 21063 { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }}, 21064 { "trigram", {fts5TriCreate, fts5TriDelete, fts5TriTokenize}}, 21065 }; 21066 21067 int rc = SQLITE_OK; /* Return code */ 21068 int i; /* To iterate through builtin functions */ 21069 21070 for(i=0; rc==SQLITE_OK && i<ArraySize(aBuiltin); i++){ 21071 rc = pApi->xCreateTokenizer(pApi, 21072 aBuiltin[i].zName, 21073 (void*)pApi, 21074 &aBuiltin[i].x, 21075 0 21076 ); 21077 } 21078 21079 return rc; 21080 } 21081 21082 #line 1 "fts5_unicode2.c" 21083 /* 21084 ** 2012-05-25 21085 ** 21086 ** The author disclaims copyright to this source code. In place of 21087 ** a legal notice, here is a blessing: 21088 ** 21089 ** May you do good and not evil. 21090 ** May you find forgiveness for yourself and forgive others. 21091 ** May you share freely, never taking more than you give. 21092 ** 21093 ****************************************************************************** 21094 */ 21095 21096 /* 21097 ** DO NOT EDIT THIS MACHINE GENERATED FILE. 21098 */ 21099 21100 21101 #include <assert.h> 21102 21103 21104 21105 /* 21106 ** If the argument is a codepoint corresponding to a lowercase letter 21107 ** in the ASCII range with a diacritic added, return the codepoint 21108 ** of the ASCII letter only. For example, if passed 235 - "LATIN 21109 ** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER 21110 ** E"). The resuls of passing a codepoint that corresponds to an 21111 ** uppercase letter are undefined. 21112 */ 21113 static int fts5_remove_diacritic(int c, int bComplex){ 21114 unsigned short aDia[] = { 21115 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, 21116 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, 21117 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, 21118 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, 21119 3456, 3696, 3712, 3728, 3744, 3766, 3832, 3896, 21120 3912, 3928, 3944, 3968, 4008, 4040, 4056, 4106, 21121 4138, 4170, 4202, 4234, 4266, 4296, 4312, 4344, 21122 4408, 4424, 4442, 4472, 4488, 4504, 6148, 6198, 21123 6264, 6280, 6360, 6429, 6505, 6529, 61448, 61468, 21124 61512, 61534, 61592, 61610, 61642, 61672, 61688, 61704, 21125 61726, 61784, 61800, 61816, 61836, 61880, 61896, 61914, 21126 61948, 61998, 62062, 62122, 62154, 62184, 62200, 62218, 21127 62252, 62302, 62364, 62410, 62442, 62478, 62536, 62554, 21128 62584, 62604, 62640, 62648, 62656, 62664, 62730, 62766, 21129 62830, 62890, 62924, 62974, 63032, 63050, 63082, 63118, 21130 63182, 63242, 63274, 63310, 63368, 63390, 21131 }; 21132 #define HIBIT ((unsigned char)0x80) 21133 unsigned char aChar[] = { 21134 '\0', 'a', 'c', 'e', 'i', 'n', 21135 'o', 'u', 'y', 'y', 'a', 'c', 21136 'd', 'e', 'e', 'g', 'h', 'i', 21137 'j', 'k', 'l', 'n', 'o', 'r', 21138 's', 't', 'u', 'u', 'w', 'y', 21139 'z', 'o', 'u', 'a', 'i', 'o', 21140 'u', 'u'|HIBIT, 'a'|HIBIT, 'g', 'k', 'o', 21141 'o'|HIBIT, 'j', 'g', 'n', 'a'|HIBIT, 'a', 21142 'e', 'i', 'o', 'r', 'u', 's', 21143 't', 'h', 'a', 'e', 'o'|HIBIT, 'o', 21144 'o'|HIBIT, 'y', '\0', '\0', '\0', '\0', 21145 '\0', '\0', '\0', '\0', 'a', 'b', 21146 'c'|HIBIT, 'd', 'd', 'e'|HIBIT, 'e', 'e'|HIBIT, 21147 'f', 'g', 'h', 'h', 'i', 'i'|HIBIT, 21148 'k', 'l', 'l'|HIBIT, 'l', 'm', 'n', 21149 'o'|HIBIT, 'p', 'r', 'r'|HIBIT, 'r', 's', 21150 's'|HIBIT, 't', 'u', 'u'|HIBIT, 'v', 'w', 21151 'w', 'x', 'y', 'z', 'h', 't', 21152 'w', 'y', 'a', 'a'|HIBIT, 'a'|HIBIT, 'a'|HIBIT, 21153 'e', 'e'|HIBIT, 'e'|HIBIT, 'i', 'o', 'o'|HIBIT, 21154 'o'|HIBIT, 'o'|HIBIT, 'u', 'u'|HIBIT, 'u'|HIBIT, 'y', 21155 }; 21156 21157 unsigned int key = (((unsigned int)c)<<3) | 0x00000007; 21158 int iRes = 0; 21159 int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; 21160 int iLo = 0; 21161 while( iHi>=iLo ){ 21162 int iTest = (iHi + iLo) / 2; 21163 if( key >= aDia[iTest] ){ 21164 iRes = iTest; 21165 iLo = iTest+1; 21166 }else{ 21167 iHi = iTest-1; 21168 } 21169 } 21170 assert( key>=aDia[iRes] ); 21171 if( bComplex==0 && (aChar[iRes] & 0x80) ) return c; 21172 return (c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : ((int)aChar[iRes] & 0x7F); 21173 } 21174 21175 21176 /* 21177 ** Return true if the argument interpreted as a unicode codepoint 21178 ** is a diacritical modifier character. 21179 */ 21180 static int sqlite3Fts5UnicodeIsdiacritic(int c){ 21181 unsigned int mask0 = 0x08029FDF; 21182 unsigned int mask1 = 0x000361F8; 21183 if( c<768 || c>817 ) return 0; 21184 return (c < 768+32) ? 21185 (mask0 & ((unsigned int)1 << (c-768))) : 21186 (mask1 & ((unsigned int)1 << (c-768-32))); 21187 } 21188 21189 21190 /* 21191 ** Interpret the argument as a unicode codepoint. If the codepoint 21192 ** is an upper case character that has a lower case equivalent, 21193 ** return the codepoint corresponding to the lower case version. 21194 ** Otherwise, return a copy of the argument. 21195 ** 21196 ** The results are undefined if the value passed to this function 21197 ** is less than zero. 21198 */ 21199 static int sqlite3Fts5UnicodeFold(int c, int eRemoveDiacritic){ 21200 /* Each entry in the following array defines a rule for folding a range 21201 ** of codepoints to lower case. The rule applies to a range of nRange 21202 ** codepoints starting at codepoint iCode. 21203 ** 21204 ** If the least significant bit in flags is clear, then the rule applies 21205 ** to all nRange codepoints (i.e. all nRange codepoints are upper case and 21206 ** need to be folded). Or, if it is set, then the rule only applies to 21207 ** every second codepoint in the range, starting with codepoint C. 21208 ** 21209 ** The 7 most significant bits in flags are an index into the aiOff[] 21210 ** array. If a specific codepoint C does require folding, then its lower 21211 ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). 21212 ** 21213 ** The contents of this array are generated by parsing the CaseFolding.txt 21214 ** file distributed as part of the "Unicode Character Database". See 21215 ** http://www.unicode.org for details. 21216 */ 21217 static const struct TableEntry { 21218 unsigned short iCode; 21219 unsigned char flags; 21220 unsigned char nRange; 21221 } aEntry[] = { 21222 {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, 21223 {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, 21224 {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, 21225 {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, 21226 {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, 21227 {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, 21228 {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, 21229 {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, 21230 {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, 21231 {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, 21232 {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, 21233 {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, 21234 {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, 21235 {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, 21236 {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, 21237 {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, 21238 {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, 21239 {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, 21240 {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, 21241 {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, 21242 {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, 21243 {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, 21244 {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, 21245 {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, 21246 {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, 21247 {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, 21248 {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, 21249 {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, 21250 {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, 21251 {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, 21252 {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, 21253 {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, 21254 {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, 21255 {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, 21256 {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, 21257 {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, 21258 {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, 21259 {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, 21260 {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, 21261 {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, 21262 {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, 21263 {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, 21264 {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, 21265 {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, 21266 {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, 21267 {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, 21268 {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, 21269 {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, 21270 {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, 21271 {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, 21272 {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, 21273 {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, 21274 {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, 21275 {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, 21276 {65313, 14, 26}, 21277 }; 21278 static const unsigned short aiOff[] = { 21279 1, 2, 8, 15, 16, 26, 28, 32, 21280 37, 38, 40, 48, 63, 64, 69, 71, 21281 79, 80, 116, 202, 203, 205, 206, 207, 21282 209, 210, 211, 213, 214, 217, 218, 219, 21283 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, 21284 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, 21285 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, 21286 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, 21287 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, 21288 65514, 65521, 65527, 65528, 65529, 21289 }; 21290 21291 int ret = c; 21292 21293 assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); 21294 21295 if( c<128 ){ 21296 if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); 21297 }else if( c<65536 ){ 21298 const struct TableEntry *p; 21299 int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; 21300 int iLo = 0; 21301 int iRes = -1; 21302 21303 assert( c>aEntry[0].iCode ); 21304 while( iHi>=iLo ){ 21305 int iTest = (iHi + iLo) / 2; 21306 int cmp = (c - aEntry[iTest].iCode); 21307 if( cmp>=0 ){ 21308 iRes = iTest; 21309 iLo = iTest+1; 21310 }else{ 21311 iHi = iTest-1; 21312 } 21313 } 21314 21315 assert( iRes>=0 && c>=aEntry[iRes].iCode ); 21316 p = &aEntry[iRes]; 21317 if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ 21318 ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; 21319 assert( ret>0 ); 21320 } 21321 21322 if( eRemoveDiacritic ){ 21323 ret = fts5_remove_diacritic(ret, eRemoveDiacritic==2); 21324 } 21325 } 21326 21327 else if( c>=66560 && c<66600 ){ 21328 ret = c + 40; 21329 } 21330 21331 return ret; 21332 } 21333 21334 21335 static int sqlite3Fts5UnicodeCatParse(const char *zCat, u8 *aArray){ 21336 aArray[0] = 1; 21337 switch( zCat[0] ){ 21338 case 'C': 21339 switch( zCat[1] ){ 21340 case 'c': aArray[1] = 1; break; 21341 case 'f': aArray[2] = 1; break; 21342 case 'n': aArray[3] = 1; break; 21343 case 's': aArray[4] = 1; break; 21344 case 'o': aArray[31] = 1; break; 21345 case '*': 21346 aArray[1] = 1; 21347 aArray[2] = 1; 21348 aArray[3] = 1; 21349 aArray[4] = 1; 21350 aArray[31] = 1; 21351 break; 21352 default: return 1; } 21353 break; 21354 21355 case 'L': 21356 switch( zCat[1] ){ 21357 case 'l': aArray[5] = 1; break; 21358 case 'm': aArray[6] = 1; break; 21359 case 'o': aArray[7] = 1; break; 21360 case 't': aArray[8] = 1; break; 21361 case 'u': aArray[9] = 1; break; 21362 case 'C': aArray[30] = 1; break; 21363 case '*': 21364 aArray[5] = 1; 21365 aArray[6] = 1; 21366 aArray[7] = 1; 21367 aArray[8] = 1; 21368 aArray[9] = 1; 21369 aArray[30] = 1; 21370 break; 21371 default: return 1; } 21372 break; 21373 21374 case 'M': 21375 switch( zCat[1] ){ 21376 case 'c': aArray[10] = 1; break; 21377 case 'e': aArray[11] = 1; break; 21378 case 'n': aArray[12] = 1; break; 21379 case '*': 21380 aArray[10] = 1; 21381 aArray[11] = 1; 21382 aArray[12] = 1; 21383 break; 21384 default: return 1; } 21385 break; 21386 21387 case 'N': 21388 switch( zCat[1] ){ 21389 case 'd': aArray[13] = 1; break; 21390 case 'l': aArray[14] = 1; break; 21391 case 'o': aArray[15] = 1; break; 21392 case '*': 21393 aArray[13] = 1; 21394 aArray[14] = 1; 21395 aArray[15] = 1; 21396 break; 21397 default: return 1; } 21398 break; 21399 21400 case 'P': 21401 switch( zCat[1] ){ 21402 case 'c': aArray[16] = 1; break; 21403 case 'd': aArray[17] = 1; break; 21404 case 'e': aArray[18] = 1; break; 21405 case 'f': aArray[19] = 1; break; 21406 case 'i': aArray[20] = 1; break; 21407 case 'o': aArray[21] = 1; break; 21408 case 's': aArray[22] = 1; break; 21409 case '*': 21410 aArray[16] = 1; 21411 aArray[17] = 1; 21412 aArray[18] = 1; 21413 aArray[19] = 1; 21414 aArray[20] = 1; 21415 aArray[21] = 1; 21416 aArray[22] = 1; 21417 break; 21418 default: return 1; } 21419 break; 21420 21421 case 'S': 21422 switch( zCat[1] ){ 21423 case 'c': aArray[23] = 1; break; 21424 case 'k': aArray[24] = 1; break; 21425 case 'm': aArray[25] = 1; break; 21426 case 'o': aArray[26] = 1; break; 21427 case '*': 21428 aArray[23] = 1; 21429 aArray[24] = 1; 21430 aArray[25] = 1; 21431 aArray[26] = 1; 21432 break; 21433 default: return 1; } 21434 break; 21435 21436 case 'Z': 21437 switch( zCat[1] ){ 21438 case 'l': aArray[27] = 1; break; 21439 case 'p': aArray[28] = 1; break; 21440 case 's': aArray[29] = 1; break; 21441 case '*': 21442 aArray[27] = 1; 21443 aArray[28] = 1; 21444 aArray[29] = 1; 21445 break; 21446 default: return 1; } 21447 break; 21448 21449 } 21450 return 0; 21451 } 21452 21453 static u16 aFts5UnicodeBlock[] = { 21454 0, 1471, 1753, 1760, 1760, 1760, 1760, 1760, 1760, 1760, 21455 1760, 1760, 1760, 1760, 1760, 1763, 1765, 21456 }; 21457 static u16 aFts5UnicodeMap[] = { 21458 0, 32, 33, 36, 37, 40, 41, 42, 43, 44, 21459 45, 46, 48, 58, 60, 63, 65, 91, 92, 93, 21460 94, 95, 96, 97, 123, 124, 125, 126, 127, 160, 21461 161, 162, 166, 167, 168, 169, 170, 171, 172, 173, 21462 174, 175, 176, 177, 178, 180, 181, 182, 184, 185, 21463 186, 187, 188, 191, 192, 215, 216, 223, 247, 248, 21464 256, 312, 313, 329, 330, 377, 383, 385, 387, 388, 21465 391, 394, 396, 398, 402, 403, 405, 406, 409, 412, 21466 414, 415, 417, 418, 423, 427, 428, 431, 434, 436, 21467 437, 440, 442, 443, 444, 446, 448, 452, 453, 454, 21468 455, 456, 457, 458, 459, 460, 461, 477, 478, 496, 21469 497, 498, 499, 500, 503, 505, 506, 564, 570, 572, 21470 573, 575, 577, 580, 583, 584, 592, 660, 661, 688, 21471 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 21472 880, 884, 885, 886, 890, 891, 894, 900, 902, 903, 21473 904, 908, 910, 912, 913, 931, 940, 975, 977, 978, 21474 981, 984, 1008, 1012, 1014, 1015, 1018, 1020, 1021, 1072, 21475 1120, 1154, 1155, 1160, 1162, 1217, 1231, 1232, 1329, 1369, 21476 1370, 1377, 1417, 1418, 1423, 1425, 1470, 1471, 1472, 1473, 21477 1475, 1476, 1478, 1479, 1488, 1520, 1523, 1536, 1542, 1545, 21478 1547, 1548, 1550, 1552, 1563, 1566, 1568, 1600, 1601, 1611, 21479 1632, 1642, 1646, 1648, 1649, 1748, 1749, 1750, 1757, 1758, 21480 1759, 1765, 1767, 1769, 1770, 1774, 1776, 1786, 1789, 1791, 21481 1792, 1807, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1984, 21482 1994, 2027, 2036, 2038, 2039, 2042, 2048, 2070, 2074, 2075, 21483 2084, 2085, 2088, 2089, 2096, 2112, 2137, 2142, 2208, 2210, 21484 2276, 2304, 2307, 2308, 2362, 2363, 2364, 2365, 2366, 2369, 21485 2377, 2381, 2382, 2384, 2385, 2392, 2402, 2404, 2406, 2416, 21486 2417, 2418, 2425, 2433, 2434, 2437, 2447, 2451, 2474, 2482, 21487 2486, 2492, 2493, 2494, 2497, 2503, 2507, 2509, 2510, 2519, 21488 2524, 2527, 2530, 2534, 2544, 2546, 2548, 2554, 2555, 2561, 21489 2563, 2565, 2575, 2579, 2602, 2610, 2613, 2616, 2620, 2622, 21490 2625, 2631, 2635, 2641, 2649, 2654, 2662, 2672, 2674, 2677, 21491 2689, 2691, 2693, 2703, 2707, 2730, 2738, 2741, 2748, 2749, 21492 2750, 2753, 2759, 2761, 2763, 2765, 2768, 2784, 2786, 2790, 21493 2800, 2801, 2817, 2818, 2821, 2831, 2835, 2858, 2866, 2869, 21494 2876, 2877, 2878, 2879, 2880, 2881, 2887, 2891, 2893, 2902, 21495 2903, 2908, 2911, 2914, 2918, 2928, 2929, 2930, 2946, 2947, 21496 2949, 2958, 2962, 2969, 2972, 2974, 2979, 2984, 2990, 3006, 21497 3008, 3009, 3014, 3018, 3021, 3024, 3031, 3046, 3056, 3059, 21498 3065, 3066, 3073, 3077, 3086, 3090, 3114, 3125, 3133, 3134, 21499 3137, 3142, 3146, 3157, 3160, 3168, 3170, 3174, 3192, 3199, 21500 3202, 3205, 3214, 3218, 3242, 3253, 3260, 3261, 3262, 3263, 21501 3264, 3270, 3271, 3274, 3276, 3285, 3294, 3296, 3298, 3302, 21502 3313, 3330, 3333, 3342, 3346, 3389, 3390, 3393, 3398, 3402, 21503 3405, 3406, 3415, 3424, 3426, 3430, 3440, 3449, 3450, 3458, 21504 3461, 3482, 3507, 3517, 3520, 3530, 3535, 3538, 3542, 3544, 21505 3570, 3572, 3585, 3633, 3634, 3636, 3647, 3648, 3654, 3655, 21506 3663, 3664, 3674, 3713, 3716, 3719, 3722, 3725, 3732, 3737, 21507 3745, 3749, 3751, 3754, 3757, 3761, 3762, 3764, 3771, 3773, 21508 3776, 3782, 3784, 3792, 3804, 3840, 3841, 3844, 3859, 3860, 21509 3861, 3864, 3866, 3872, 3882, 3892, 3893, 3894, 3895, 3896, 21510 3897, 3898, 3899, 3900, 3901, 3902, 3904, 3913, 3953, 3967, 21511 3968, 3973, 3974, 3976, 3981, 3993, 4030, 4038, 4039, 4046, 21512 4048, 4053, 4057, 4096, 4139, 4141, 4145, 4146, 4152, 4153, 21513 4155, 4157, 4159, 4160, 4170, 4176, 4182, 4184, 4186, 4190, 21514 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4227, 4229, 21515 4231, 4237, 4238, 4239, 4240, 4250, 4253, 4254, 4256, 4295, 21516 4301, 4304, 4347, 4348, 4349, 4682, 4688, 4696, 4698, 4704, 21517 4746, 4752, 4786, 4792, 4800, 4802, 4808, 4824, 4882, 4888, 21518 4957, 4960, 4969, 4992, 5008, 5024, 5120, 5121, 5741, 5743, 21519 5760, 5761, 5787, 5788, 5792, 5867, 5870, 5888, 5902, 5906, 21520 5920, 5938, 5941, 5952, 5970, 5984, 5998, 6002, 6016, 6068, 21521 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6103, 6104, 6107, 21522 6108, 6109, 6112, 6128, 6144, 6150, 6151, 6155, 6158, 6160, 21523 6176, 6211, 6212, 6272, 6313, 6314, 6320, 6400, 6432, 6435, 21524 6439, 6441, 6448, 6450, 6451, 6457, 6464, 6468, 6470, 6480, 21525 6512, 6528, 6576, 6593, 6600, 6608, 6618, 6622, 6656, 6679, 21526 6681, 6686, 6688, 6741, 6742, 6743, 6744, 6752, 6753, 6754, 21527 6755, 6757, 6765, 6771, 6783, 6784, 6800, 6816, 6823, 6824, 21528 6912, 6916, 6917, 6964, 6965, 6966, 6971, 6972, 6973, 6978, 21529 6979, 6981, 6992, 7002, 7009, 7019, 7028, 7040, 7042, 7043, 21530 7073, 7074, 7078, 7080, 7082, 7083, 7084, 7086, 7088, 7098, 21531 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7164, 7168, 21532 7204, 7212, 7220, 7222, 7227, 7232, 7245, 7248, 7258, 7288, 21533 7294, 7360, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, 21534 7410, 7412, 7413, 7424, 7468, 7531, 7544, 7545, 7579, 7616, 21535 7676, 7680, 7830, 7838, 7936, 7944, 7952, 7960, 7968, 7976, 21536 7984, 7992, 8000, 8008, 8016, 8025, 8027, 8029, 8031, 8033, 21537 8040, 8048, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8118, 21538 8120, 8124, 8125, 8126, 8127, 8130, 8134, 8136, 8140, 8141, 21539 8144, 8150, 8152, 8157, 8160, 8168, 8173, 8178, 8182, 8184, 21540 8188, 8189, 8192, 8203, 8208, 8214, 8216, 8217, 8218, 8219, 21541 8221, 8222, 8223, 8224, 8232, 8233, 8234, 8239, 8240, 8249, 21542 8250, 8251, 8255, 8257, 8260, 8261, 8262, 8263, 8274, 8275, 21543 8276, 8277, 8287, 8288, 8298, 8304, 8305, 8308, 8314, 8317, 21544 8318, 8319, 8320, 8330, 8333, 8334, 8336, 8352, 8400, 8413, 21545 8417, 8418, 8421, 8448, 8450, 8451, 8455, 8456, 8458, 8459, 21546 8462, 8464, 8467, 8468, 8469, 8470, 8472, 8473, 8478, 8484, 21547 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8496, 8500, 21548 8501, 8505, 8506, 8508, 8510, 8512, 8517, 8519, 8522, 8523, 21549 8524, 8526, 8527, 8528, 8544, 8579, 8581, 8585, 8592, 8597, 21550 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 21551 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, 21552 8992, 8994, 9001, 9002, 9003, 9084, 9085, 9115, 9140, 9180, 21553 9186, 9216, 9280, 9312, 9372, 9450, 9472, 9655, 9656, 9665, 21554 9666, 9720, 9728, 9839, 9840, 9985, 10088, 10089, 10090, 10091, 21555 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, 21556 10102, 10132, 10176, 10181, 10182, 10183, 10214, 10215, 10216, 10217, 21557 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10240, 10496, 10627, 21558 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 21559 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 21560 10648, 10649, 10712, 10713, 10714, 10715, 10716, 10748, 10749, 10750, 21561 11008, 11056, 11077, 11079, 11088, 11264, 11312, 11360, 11363, 11365, 21562 11367, 11374, 11377, 11378, 11380, 11381, 11383, 11388, 11390, 11393, 21563 11394, 11492, 11493, 11499, 11503, 11506, 11513, 11517, 11518, 11520, 21564 11559, 11565, 11568, 11631, 11632, 11647, 11648, 11680, 11688, 11696, 21565 11704, 11712, 11720, 11728, 11736, 11744, 11776, 11778, 11779, 11780, 21566 11781, 11782, 11785, 11786, 11787, 11788, 11789, 11790, 11799, 11800, 21567 11802, 11803, 11804, 11805, 11806, 11808, 11809, 11810, 11811, 11812, 21568 11813, 11814, 11815, 11816, 11817, 11818, 11823, 11824, 11834, 11904, 21569 11931, 12032, 12272, 12288, 12289, 12292, 12293, 12294, 12295, 12296, 21570 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, 21571 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12317, 21572 12318, 12320, 12321, 12330, 12334, 12336, 12337, 12342, 12344, 12347, 21573 12348, 12349, 12350, 12353, 12441, 12443, 12445, 12447, 12448, 12449, 21574 12539, 12540, 12543, 12549, 12593, 12688, 12690, 12694, 12704, 12736, 21575 12784, 12800, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 21576 12977, 12992, 13056, 13312, 19893, 19904, 19968, 40908, 40960, 40981, 21577 40982, 42128, 42192, 42232, 42238, 42240, 42508, 42509, 42512, 42528, 21578 42538, 42560, 42606, 42607, 42608, 42611, 42612, 42622, 42623, 42624, 21579 42655, 42656, 42726, 42736, 42738, 42752, 42775, 42784, 42786, 42800, 21580 42802, 42864, 42865, 42873, 42878, 42888, 42889, 42891, 42896, 42912, 21581 43000, 43002, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 21582 43045, 43047, 43048, 43056, 43062, 43064, 43065, 43072, 43124, 43136, 21583 43138, 43188, 43204, 43214, 43216, 43232, 43250, 43256, 43259, 43264, 21584 43274, 43302, 43310, 43312, 43335, 43346, 43359, 43360, 43392, 43395, 21585 43396, 43443, 43444, 43446, 43450, 43452, 43453, 43457, 43471, 43472, 21586 43486, 43520, 43561, 43567, 43569, 43571, 43573, 43584, 43587, 43588, 21587 43596, 43597, 43600, 43612, 43616, 43632, 43633, 43639, 43642, 43643, 21588 43648, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 21589 43714, 43739, 43741, 43742, 43744, 43755, 43756, 43758, 43760, 43762, 21590 43763, 43765, 43766, 43777, 43785, 43793, 43808, 43816, 43968, 44003, 21591 44005, 44006, 44008, 44009, 44011, 44012, 44013, 44016, 44032, 55203, 21592 55216, 55243, 55296, 56191, 56319, 57343, 57344, 63743, 63744, 64112, 21593 64256, 64275, 64285, 64286, 64287, 64297, 64298, 64312, 64318, 64320, 21594 64323, 64326, 64434, 64467, 64830, 64831, 64848, 64914, 65008, 65020, 21595 65021, 65024, 65040, 65047, 65048, 65049, 65056, 65072, 65073, 65075, 21596 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 21597 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65095, 65096, 65097, 21598 65101, 65104, 65108, 65112, 65113, 65114, 65115, 65116, 65117, 65118, 21599 65119, 65122, 65123, 65124, 65128, 65129, 65130, 65136, 65142, 65279, 21600 65281, 65284, 65285, 65288, 65289, 65290, 65291, 65292, 65293, 65294, 21601 65296, 65306, 65308, 65311, 65313, 65339, 65340, 65341, 65342, 65343, 21602 65344, 65345, 65371, 65372, 65373, 65374, 65375, 65376, 65377, 65378, 21603 65379, 65380, 65382, 65392, 65393, 65438, 65440, 65474, 65482, 65490, 21604 65498, 65504, 65506, 65507, 65508, 65509, 65512, 65513, 65517, 65529, 21605 65532, 0, 13, 40, 60, 63, 80, 128, 256, 263, 21606 311, 320, 373, 377, 394, 400, 464, 509, 640, 672, 21607 768, 800, 816, 833, 834, 842, 896, 927, 928, 968, 21608 976, 977, 1024, 1064, 1104, 1184, 2048, 2056, 2058, 2103, 21609 2108, 2111, 2135, 2136, 2304, 2326, 2335, 2336, 2367, 2432, 21610 2494, 2560, 2561, 2565, 2572, 2576, 2581, 2585, 2616, 2623, 21611 2624, 2640, 2656, 2685, 2687, 2816, 2873, 2880, 2904, 2912, 21612 2936, 3072, 3680, 4096, 4097, 4098, 4099, 4152, 4167, 4178, 21613 4198, 4224, 4226, 4227, 4272, 4275, 4279, 4281, 4283, 4285, 21614 4286, 4304, 4336, 4352, 4355, 4391, 4396, 4397, 4406, 4416, 21615 4480, 4482, 4483, 4531, 4534, 4543, 4545, 4549, 4560, 5760, 21616 5803, 5804, 5805, 5806, 5808, 5814, 5815, 5824, 8192, 9216, 21617 9328, 12288, 26624, 28416, 28496, 28497, 28559, 28563, 45056, 53248, 21618 53504, 53545, 53605, 53607, 53610, 53613, 53619, 53627, 53635, 53637, 21619 53644, 53674, 53678, 53760, 53826, 53829, 54016, 54112, 54272, 54298, 21620 54324, 54350, 54358, 54376, 54402, 54428, 54430, 54434, 54437, 54441, 21621 54446, 54454, 54459, 54461, 54469, 54480, 54506, 54532, 54535, 54541, 21622 54550, 54558, 54584, 54587, 54592, 54598, 54602, 54610, 54636, 54662, 21623 54688, 54714, 54740, 54766, 54792, 54818, 54844, 54870, 54896, 54922, 21624 54952, 54977, 54978, 55003, 55004, 55010, 55035, 55036, 55061, 55062, 21625 55068, 55093, 55094, 55119, 55120, 55126, 55151, 55152, 55177, 55178, 21626 55184, 55209, 55210, 55235, 55236, 55242, 55246, 60928, 60933, 60961, 21627 60964, 60967, 60969, 60980, 60985, 60987, 60994, 60999, 61001, 61003, 21628 61005, 61009, 61012, 61015, 61017, 61019, 61021, 61023, 61025, 61028, 21629 61031, 61036, 61044, 61049, 61054, 61056, 61067, 61089, 61093, 61099, 21630 61168, 61440, 61488, 61600, 61617, 61633, 61649, 61696, 61712, 61744, 21631 61808, 61926, 61968, 62016, 62032, 62208, 62256, 62263, 62336, 62368, 21632 62406, 62432, 62464, 62528, 62530, 62713, 62720, 62784, 62800, 62971, 21633 63045, 63104, 63232, 0, 42710, 42752, 46900, 46912, 47133, 63488, 21634 1, 32, 256, 0, 65533, 21635 }; 21636 static u16 aFts5UnicodeData[] = { 21637 1025, 61, 117, 55, 117, 54, 50, 53, 57, 53, 21638 49, 85, 333, 85, 121, 85, 841, 54, 53, 50, 21639 56, 48, 56, 837, 54, 57, 50, 57, 1057, 61, 21640 53, 151, 58, 53, 56, 58, 39, 52, 57, 34, 21641 58, 56, 58, 57, 79, 56, 37, 85, 56, 47, 21642 39, 51, 111, 53, 745, 57, 233, 773, 57, 261, 21643 1822, 37, 542, 37, 1534, 222, 69, 73, 37, 126, 21644 126, 73, 69, 137, 37, 73, 37, 105, 101, 73, 21645 37, 73, 37, 190, 158, 37, 126, 126, 73, 37, 21646 126, 94, 37, 39, 94, 69, 135, 41, 40, 37, 21647 41, 40, 37, 41, 40, 37, 542, 37, 606, 37, 21648 41, 40, 37, 126, 73, 37, 1886, 197, 73, 37, 21649 73, 69, 126, 105, 37, 286, 2181, 39, 869, 582, 21650 152, 390, 472, 166, 248, 38, 56, 38, 568, 3596, 21651 158, 38, 56, 94, 38, 101, 53, 88, 41, 53, 21652 105, 41, 73, 37, 553, 297, 1125, 94, 37, 105, 21653 101, 798, 133, 94, 57, 126, 94, 37, 1641, 1541, 21654 1118, 58, 172, 75, 1790, 478, 37, 2846, 1225, 38, 21655 213, 1253, 53, 49, 55, 1452, 49, 44, 53, 76, 21656 53, 76, 53, 44, 871, 103, 85, 162, 121, 85, 21657 55, 85, 90, 364, 53, 85, 1031, 38, 327, 684, 21658 333, 149, 71, 44, 3175, 53, 39, 236, 34, 58, 21659 204, 70, 76, 58, 140, 71, 333, 103, 90, 39, 21660 469, 34, 39, 44, 967, 876, 2855, 364, 39, 333, 21661 1063, 300, 70, 58, 117, 38, 711, 140, 38, 300, 21662 38, 108, 38, 172, 501, 807, 108, 53, 39, 359, 21663 876, 108, 42, 1735, 44, 42, 44, 39, 106, 268, 21664 138, 44, 74, 39, 236, 327, 76, 85, 333, 53, 21665 38, 199, 231, 44, 74, 263, 71, 711, 231, 39, 21666 135, 44, 39, 106, 140, 74, 74, 44, 39, 42, 21667 71, 103, 76, 333, 71, 87, 207, 58, 55, 76, 21668 42, 199, 71, 711, 231, 71, 71, 71, 44, 106, 21669 76, 76, 108, 44, 135, 39, 333, 76, 103, 44, 21670 76, 42, 295, 103, 711, 231, 71, 167, 44, 39, 21671 106, 172, 76, 42, 74, 44, 39, 71, 76, 333, 21672 53, 55, 44, 74, 263, 71, 711, 231, 71, 167, 21673 44, 39, 42, 44, 42, 140, 74, 74, 44, 44, 21674 42, 71, 103, 76, 333, 58, 39, 207, 44, 39, 21675 199, 103, 135, 71, 39, 71, 71, 103, 391, 74, 21676 44, 74, 106, 106, 44, 39, 42, 333, 111, 218, 21677 55, 58, 106, 263, 103, 743, 327, 167, 39, 108, 21678 138, 108, 140, 76, 71, 71, 76, 333, 239, 58, 21679 74, 263, 103, 743, 327, 167, 44, 39, 42, 44, 21680 170, 44, 74, 74, 76, 74, 39, 71, 76, 333, 21681 71, 74, 263, 103, 1319, 39, 106, 140, 106, 106, 21682 44, 39, 42, 71, 76, 333, 207, 58, 199, 74, 21683 583, 775, 295, 39, 231, 44, 106, 108, 44, 266, 21684 74, 53, 1543, 44, 71, 236, 55, 199, 38, 268, 21685 53, 333, 85, 71, 39, 71, 39, 39, 135, 231, 21686 103, 39, 39, 71, 135, 44, 71, 204, 76, 39, 21687 167, 38, 204, 333, 135, 39, 122, 501, 58, 53, 21688 122, 76, 218, 333, 335, 58, 44, 58, 44, 58, 21689 44, 54, 50, 54, 50, 74, 263, 1159, 460, 42, 21690 172, 53, 76, 167, 364, 1164, 282, 44, 218, 90, 21691 181, 154, 85, 1383, 74, 140, 42, 204, 42, 76, 21692 74, 76, 39, 333, 213, 199, 74, 76, 135, 108, 21693 39, 106, 71, 234, 103, 140, 423, 44, 74, 76, 21694 202, 44, 39, 42, 333, 106, 44, 90, 1225, 41, 21695 41, 1383, 53, 38, 10631, 135, 231, 39, 135, 1319, 21696 135, 1063, 135, 231, 39, 135, 487, 1831, 135, 2151, 21697 108, 309, 655, 519, 346, 2727, 49, 19847, 85, 551, 21698 61, 839, 54, 50, 2407, 117, 110, 423, 135, 108, 21699 583, 108, 85, 583, 76, 423, 103, 76, 1671, 76, 21700 42, 236, 266, 44, 74, 364, 117, 38, 117, 55, 21701 39, 44, 333, 335, 213, 49, 149, 108, 61, 333, 21702 1127, 38, 1671, 1319, 44, 39, 2247, 935, 108, 138, 21703 76, 106, 74, 44, 202, 108, 58, 85, 333, 967, 21704 167, 1415, 554, 231, 74, 333, 47, 1114, 743, 76, 21705 106, 85, 1703, 42, 44, 42, 236, 44, 42, 44, 21706 74, 268, 202, 332, 44, 333, 333, 245, 38, 213, 21707 140, 42, 1511, 44, 42, 172, 42, 44, 170, 44, 21708 74, 231, 333, 245, 346, 300, 314, 76, 42, 967, 21709 42, 140, 74, 76, 42, 44, 74, 71, 333, 1415, 21710 44, 42, 76, 106, 44, 42, 108, 74, 149, 1159, 21711 266, 268, 74, 76, 181, 333, 103, 333, 967, 198, 21712 85, 277, 108, 53, 428, 42, 236, 135, 44, 135, 21713 74, 44, 71, 1413, 2022, 421, 38, 1093, 1190, 1260, 21714 140, 4830, 261, 3166, 261, 265, 197, 201, 261, 265, 21715 261, 265, 197, 201, 261, 41, 41, 41, 94, 229, 21716 265, 453, 261, 264, 261, 264, 261, 264, 165, 69, 21717 137, 40, 56, 37, 120, 101, 69, 137, 40, 120, 21718 133, 69, 137, 120, 261, 169, 120, 101, 69, 137, 21719 40, 88, 381, 162, 209, 85, 52, 51, 54, 84, 21720 51, 54, 52, 277, 59, 60, 162, 61, 309, 52, 21721 51, 149, 80, 117, 57, 54, 50, 373, 57, 53, 21722 48, 341, 61, 162, 194, 47, 38, 207, 121, 54, 21723 50, 38, 335, 121, 54, 50, 422, 855, 428, 139, 21724 44, 107, 396, 90, 41, 154, 41, 90, 37, 105, 21725 69, 105, 37, 58, 41, 90, 57, 169, 218, 41, 21726 58, 41, 58, 41, 58, 137, 58, 37, 137, 37, 21727 135, 37, 90, 69, 73, 185, 94, 101, 58, 57, 21728 90, 37, 58, 527, 1134, 94, 142, 47, 185, 186, 21729 89, 154, 57, 90, 57, 90, 57, 250, 57, 1018, 21730 89, 90, 57, 58, 57, 1018, 8601, 282, 153, 666, 21731 89, 250, 54, 50, 2618, 57, 986, 825, 1306, 217, 21732 602, 1274, 378, 1935, 2522, 719, 5882, 57, 314, 57, 21733 1754, 281, 3578, 57, 4634, 3322, 54, 50, 54, 50, 21734 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, 21735 975, 1434, 185, 54, 50, 1017, 54, 50, 54, 50, 21736 54, 50, 54, 50, 54, 50, 537, 8218, 4217, 54, 21737 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, 21738 50, 54, 50, 54, 50, 54, 50, 54, 50, 54, 21739 50, 2041, 54, 50, 54, 50, 1049, 54, 50, 8281, 21740 1562, 697, 90, 217, 346, 1513, 1509, 126, 73, 69, 21741 254, 105, 37, 94, 37, 94, 165, 70, 105, 37, 21742 3166, 37, 218, 158, 108, 94, 149, 47, 85, 1221, 21743 37, 37, 1799, 38, 53, 44, 743, 231, 231, 231, 21744 231, 231, 231, 231, 231, 1036, 85, 52, 51, 52, 21745 51, 117, 52, 51, 53, 52, 51, 309, 49, 85, 21746 49, 53, 52, 51, 85, 52, 51, 54, 50, 54, 21747 50, 54, 50, 54, 50, 181, 38, 341, 81, 858, 21748 2874, 6874, 410, 61, 117, 58, 38, 39, 46, 54, 21749 50, 54, 50, 54, 50, 54, 50, 54, 50, 90, 21750 54, 50, 54, 50, 54, 50, 54, 50, 49, 54, 21751 82, 58, 302, 140, 74, 49, 166, 90, 110, 38, 21752 39, 53, 90, 2759, 76, 88, 70, 39, 49, 2887, 21753 53, 102, 39, 1319, 3015, 90, 143, 346, 871, 1178, 21754 519, 1018, 335, 986, 271, 58, 495, 1050, 335, 1274, 21755 495, 2042, 8218, 39, 39, 2074, 39, 39, 679, 38, 21756 36583, 1786, 1287, 198, 85, 8583, 38, 117, 519, 333, 21757 71, 1502, 39, 44, 107, 53, 332, 53, 38, 798, 21758 44, 2247, 334, 76, 213, 760, 294, 88, 478, 69, 21759 2014, 38, 261, 190, 350, 38, 88, 158, 158, 382, 21760 70, 37, 231, 44, 103, 44, 135, 44, 743, 74, 21761 76, 42, 154, 207, 90, 55, 58, 1671, 149, 74, 21762 1607, 522, 44, 85, 333, 588, 199, 117, 39, 333, 21763 903, 268, 85, 743, 364, 74, 53, 935, 108, 42, 21764 1511, 44, 74, 140, 74, 44, 138, 437, 38, 333, 21765 85, 1319, 204, 74, 76, 74, 76, 103, 44, 263, 21766 44, 42, 333, 149, 519, 38, 199, 122, 39, 42, 21767 1543, 44, 39, 108, 71, 76, 167, 76, 39, 44, 21768 39, 71, 38, 85, 359, 42, 76, 74, 85, 39, 21769 70, 42, 44, 199, 199, 199, 231, 231, 1127, 74, 21770 44, 74, 44, 74, 53, 42, 44, 333, 39, 39, 21771 743, 1575, 36, 68, 68, 36, 63, 63, 11719, 3399, 21772 229, 165, 39, 44, 327, 57, 423, 167, 39, 71, 21773 71, 3463, 536, 11623, 54, 50, 2055, 1735, 391, 55, 21774 58, 524, 245, 54, 50, 53, 236, 53, 81, 80, 21775 54, 50, 54, 50, 54, 50, 54, 50, 54, 50, 21776 54, 50, 54, 50, 54, 50, 85, 54, 50, 149, 21777 112, 117, 149, 49, 54, 50, 54, 50, 54, 50, 21778 117, 57, 49, 121, 53, 55, 85, 167, 4327, 34, 21779 117, 55, 117, 54, 50, 53, 57, 53, 49, 85, 21780 333, 85, 121, 85, 841, 54, 53, 50, 56, 48, 21781 56, 837, 54, 57, 50, 57, 54, 50, 53, 54, 21782 50, 85, 327, 38, 1447, 70, 999, 199, 199, 199, 21783 103, 87, 57, 56, 58, 87, 58, 153, 90, 98, 21784 90, 391, 839, 615, 71, 487, 455, 3943, 117, 1455, 21785 314, 1710, 143, 570, 47, 410, 1466, 44, 935, 1575, 21786 999, 143, 551, 46, 263, 46, 967, 53, 1159, 263, 21787 53, 174, 1289, 1285, 2503, 333, 199, 39, 1415, 71, 21788 39, 743, 53, 271, 711, 207, 53, 839, 53, 1799, 21789 71, 39, 108, 76, 140, 135, 103, 871, 108, 44, 21790 271, 309, 935, 79, 53, 1735, 245, 711, 271, 615, 21791 271, 2343, 1007, 42, 44, 42, 1703, 492, 245, 655, 21792 333, 76, 42, 1447, 106, 140, 74, 76, 85, 34, 21793 149, 807, 333, 108, 1159, 172, 42, 268, 333, 149, 21794 76, 42, 1543, 106, 300, 74, 135, 149, 333, 1383, 21795 44, 42, 44, 74, 204, 42, 44, 333, 28135, 3182, 21796 149, 34279, 18215, 2215, 39, 1482, 140, 422, 71, 7898, 21797 1274, 1946, 74, 108, 122, 202, 258, 268, 90, 236, 21798 986, 140, 1562, 2138, 108, 58, 2810, 591, 841, 837, 21799 841, 229, 581, 841, 837, 41, 73, 41, 73, 137, 21800 265, 133, 37, 229, 357, 841, 837, 73, 137, 265, 21801 233, 837, 73, 137, 169, 41, 233, 837, 841, 837, 21802 841, 837, 841, 837, 841, 837, 841, 837, 841, 901, 21803 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, 21804 809, 57, 805, 57, 197, 809, 57, 805, 57, 197, 21805 809, 57, 805, 57, 197, 94, 1613, 135, 871, 71, 21806 39, 39, 327, 135, 39, 39, 39, 39, 39, 39, 21807 103, 71, 39, 39, 39, 39, 39, 39, 71, 39, 21808 135, 231, 135, 135, 39, 327, 551, 103, 167, 551, 21809 89, 1434, 3226, 506, 474, 506, 506, 367, 1018, 1946, 21810 1402, 954, 1402, 314, 90, 1082, 218, 2266, 666, 1210, 21811 186, 570, 2042, 58, 5850, 154, 2010, 154, 794, 2266, 21812 378, 2266, 3738, 39, 39, 39, 39, 39, 39, 17351, 21813 34, 3074, 7692, 63, 63, 21814 }; 21815 21816 static int sqlite3Fts5UnicodeCategory(u32 iCode) { 21817 int iRes = -1; 21818 int iHi; 21819 int iLo; 21820 int ret; 21821 u16 iKey; 21822 21823 if( iCode>=(1<<20) ){ 21824 return 0; 21825 } 21826 iLo = aFts5UnicodeBlock[(iCode>>16)]; 21827 iHi = aFts5UnicodeBlock[1+(iCode>>16)]; 21828 iKey = (iCode & 0xFFFF); 21829 while( iHi>iLo ){ 21830 int iTest = (iHi + iLo) / 2; 21831 assert( iTest>=iLo && iTest<iHi ); 21832 if( iKey>=aFts5UnicodeMap[iTest] ){ 21833 iRes = iTest; 21834 iLo = iTest+1; 21835 }else{ 21836 iHi = iTest; 21837 } 21838 } 21839 21840 if( iRes<0 ) return 0; 21841 if( iKey>=(aFts5UnicodeMap[iRes]+(aFts5UnicodeData[iRes]>>5)) ) return 0; 21842 ret = aFts5UnicodeData[iRes] & 0x1F; 21843 if( ret!=30 ) return ret; 21844 return ((iKey - aFts5UnicodeMap[iRes]) & 0x01) ? 5 : 9; 21845 } 21846 21847 static void sqlite3Fts5UnicodeAscii(u8 *aArray, u8 *aAscii){ 21848 int i = 0; 21849 int iTbl = 0; 21850 while( i<128 ){ 21851 int bToken = aArray[ aFts5UnicodeData[iTbl] & 0x1F ]; 21852 int n = (aFts5UnicodeData[iTbl] >> 5) + i; 21853 for(; i<128 && i<n; i++){ 21854 aAscii[i] = (u8)bToken; 21855 } 21856 iTbl++; 21857 } 21858 aAscii[0] = 0; /* 0x00 is never a token character */ 21859 } 21860 21861 21862 #line 1 "fts5_varint.c" 21863 /* 21864 ** 2015 May 30 21865 ** 21866 ** The author disclaims copyright to this source code. In place of 21867 ** a legal notice, here is a blessing: 21868 ** 21869 ** May you do good and not evil. 21870 ** May you find forgiveness for yourself and forgive others. 21871 ** May you share freely, never taking more than you give. 21872 ** 21873 ****************************************************************************** 21874 ** 21875 ** Routines for varint serialization and deserialization. 21876 */ 21877 21878 21879 /* #include "fts5Int.h" */ 21880 21881 /* 21882 ** This is a copy of the sqlite3GetVarint32() routine from the SQLite core. 21883 ** Except, this version does handle the single byte case that the core 21884 ** version depends on being handled before its function is called. 21885 */ 21886 static int sqlite3Fts5GetVarint32(const unsigned char *p, u32 *v){ 21887 u32 a,b; 21888 21889 /* The 1-byte case. Overwhelmingly the most common. */ 21890 a = *p; 21891 /* a: p0 (unmasked) */ 21892 if (!(a&0x80)) 21893 { 21894 /* Values between 0 and 127 */ 21895 *v = a; 21896 return 1; 21897 } 21898 21899 /* The 2-byte case */ 21900 p++; 21901 b = *p; 21902 /* b: p1 (unmasked) */ 21903 if (!(b&0x80)) 21904 { 21905 /* Values between 128 and 16383 */ 21906 a &= 0x7f; 21907 a = a<<7; 21908 *v = a | b; 21909 return 2; 21910 } 21911 21912 /* The 3-byte case */ 21913 p++; 21914 a = a<<14; 21915 a |= *p; 21916 /* a: p0<<14 | p2 (unmasked) */ 21917 if (!(a&0x80)) 21918 { 21919 /* Values between 16384 and 2097151 */ 21920 a &= (0x7f<<14)|(0x7f); 21921 b &= 0x7f; 21922 b = b<<7; 21923 *v = a | b; 21924 return 3; 21925 } 21926 21927 /* A 32-bit varint is used to store size information in btrees. 21928 ** Objects are rarely larger than 2MiB limit of a 3-byte varint. 21929 ** A 3-byte varint is sufficient, for example, to record the size 21930 ** of a 1048569-byte BLOB or string. 21931 ** 21932 ** We only unroll the first 1-, 2-, and 3- byte cases. The very 21933 ** rare larger cases can be handled by the slower 64-bit varint 21934 ** routine. 21935 */ 21936 { 21937 u64 v64; 21938 u8 n; 21939 p -= 2; 21940 n = sqlite3Fts5GetVarint(p, &v64); 21941 *v = ((u32)v64) & 0x7FFFFFFF; 21942 assert( n>3 && n<=9 ); 21943 return n; 21944 } 21945 } 21946 21947 21948 /* 21949 ** Bitmasks used by sqlite3GetVarint(). These precomputed constants 21950 ** are defined here rather than simply putting the constant expressions 21951 ** inline in order to work around bugs in the RVT compiler. 21952 ** 21953 ** SLOT_2_0 A mask for (0x7f<<14) | 0x7f 21954 ** 21955 ** SLOT_4_2_0 A mask for (0x7f<<28) | SLOT_2_0 21956 */ 21957 #define SLOT_2_0 0x001fc07f 21958 #define SLOT_4_2_0 0xf01fc07f 21959 21960 /* 21961 ** Read a 64-bit variable-length integer from memory starting at p[0]. 21962 ** Return the number of bytes read. The value is stored in *v. 21963 */ 21964 static u8 sqlite3Fts5GetVarint(const unsigned char *p, u64 *v){ 21965 u32 a,b,s; 21966 21967 a = *p; 21968 /* a: p0 (unmasked) */ 21969 if (!(a&0x80)) 21970 { 21971 *v = a; 21972 return 1; 21973 } 21974 21975 p++; 21976 b = *p; 21977 /* b: p1 (unmasked) */ 21978 if (!(b&0x80)) 21979 { 21980 a &= 0x7f; 21981 a = a<<7; 21982 a |= b; 21983 *v = a; 21984 return 2; 21985 } 21986 21987 /* Verify that constants are precomputed correctly */ 21988 assert( SLOT_2_0 == ((0x7f<<14) | (0x7f)) ); 21989 assert( SLOT_4_2_0 == ((0xfU<<28) | (0x7f<<14) | (0x7f)) ); 21990 21991 p++; 21992 a = a<<14; 21993 a |= *p; 21994 /* a: p0<<14 | p2 (unmasked) */ 21995 if (!(a&0x80)) 21996 { 21997 a &= SLOT_2_0; 21998 b &= 0x7f; 21999 b = b<<7; 22000 a |= b; 22001 *v = a; 22002 return 3; 22003 } 22004 22005 /* CSE1 from below */ 22006 a &= SLOT_2_0; 22007 p++; 22008 b = b<<14; 22009 b |= *p; 22010 /* b: p1<<14 | p3 (unmasked) */ 22011 if (!(b&0x80)) 22012 { 22013 b &= SLOT_2_0; 22014 /* moved CSE1 up */ 22015 /* a &= (0x7f<<14)|(0x7f); */ 22016 a = a<<7; 22017 a |= b; 22018 *v = a; 22019 return 4; 22020 } 22021 22022 /* a: p0<<14 | p2 (masked) */ 22023 /* b: p1<<14 | p3 (unmasked) */ 22024 /* 1:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ 22025 /* moved CSE1 up */ 22026 /* a &= (0x7f<<14)|(0x7f); */ 22027 b &= SLOT_2_0; 22028 s = a; 22029 /* s: p0<<14 | p2 (masked) */ 22030 22031 p++; 22032 a = a<<14; 22033 a |= *p; 22034 /* a: p0<<28 | p2<<14 | p4 (unmasked) */ 22035 if (!(a&0x80)) 22036 { 22037 /* we can skip these cause they were (effectively) done above in calc'ing s */ 22038 /* a &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ 22039 /* b &= (0x7f<<14)|(0x7f); */ 22040 b = b<<7; 22041 a |= b; 22042 s = s>>18; 22043 *v = ((u64)s)<<32 | a; 22044 return 5; 22045 } 22046 22047 /* 2:save off p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ 22048 s = s<<7; 22049 s |= b; 22050 /* s: p0<<21 | p1<<14 | p2<<7 | p3 (masked) */ 22051 22052 p++; 22053 b = b<<14; 22054 b |= *p; 22055 /* b: p1<<28 | p3<<14 | p5 (unmasked) */ 22056 if (!(b&0x80)) 22057 { 22058 /* we can skip this cause it was (effectively) done above in calc'ing s */ 22059 /* b &= (0x7f<<28)|(0x7f<<14)|(0x7f); */ 22060 a &= SLOT_2_0; 22061 a = a<<7; 22062 a |= b; 22063 s = s>>18; 22064 *v = ((u64)s)<<32 | a; 22065 return 6; 22066 } 22067 22068 p++; 22069 a = a<<14; 22070 a |= *p; 22071 /* a: p2<<28 | p4<<14 | p6 (unmasked) */ 22072 if (!(a&0x80)) 22073 { 22074 a &= SLOT_4_2_0; 22075 b &= SLOT_2_0; 22076 b = b<<7; 22077 a |= b; 22078 s = s>>11; 22079 *v = ((u64)s)<<32 | a; 22080 return 7; 22081 } 22082 22083 /* CSE2 from below */ 22084 a &= SLOT_2_0; 22085 p++; 22086 b = b<<14; 22087 b |= *p; 22088 /* b: p3<<28 | p5<<14 | p7 (unmasked) */ 22089 if (!(b&0x80)) 22090 { 22091 b &= SLOT_4_2_0; 22092 /* moved CSE2 up */ 22093 /* a &= (0x7f<<14)|(0x7f); */ 22094 a = a<<7; 22095 a |= b; 22096 s = s>>4; 22097 *v = ((u64)s)<<32 | a; 22098 return 8; 22099 } 22100 22101 p++; 22102 a = a<<15; 22103 a |= *p; 22104 /* a: p4<<29 | p6<<15 | p8 (unmasked) */ 22105 22106 /* moved CSE2 up */ 22107 /* a &= (0x7f<<29)|(0x7f<<15)|(0xff); */ 22108 b &= SLOT_2_0; 22109 b = b<<8; 22110 a |= b; 22111 22112 s = s<<4; 22113 b = p[-4]; 22114 b &= 0x7f; 22115 b = b>>3; 22116 s |= b; 22117 22118 *v = ((u64)s)<<32 | a; 22119 22120 return 9; 22121 } 22122 22123 /* 22124 ** The variable-length integer encoding is as follows: 22125 ** 22126 ** KEY: 22127 ** A = 0xxxxxxx 7 bits of data and one flag bit 22128 ** B = 1xxxxxxx 7 bits of data and one flag bit 22129 ** C = xxxxxxxx 8 bits of data 22130 ** 22131 ** 7 bits - A 22132 ** 14 bits - BA 22133 ** 21 bits - BBA 22134 ** 28 bits - BBBA 22135 ** 35 bits - BBBBA 22136 ** 42 bits - BBBBBA 22137 ** 49 bits - BBBBBBA 22138 ** 56 bits - BBBBBBBA 22139 ** 64 bits - BBBBBBBBC 22140 */ 22141 22142 #ifdef SQLITE_NOINLINE 22143 # define FTS5_NOINLINE SQLITE_NOINLINE 22144 #else 22145 # define FTS5_NOINLINE 22146 #endif 22147 22148 /* 22149 ** Write a 64-bit variable-length integer to memory starting at p[0]. 22150 ** The length of data write will be between 1 and 9 bytes. The number 22151 ** of bytes written is returned. 22152 ** 22153 ** A variable-length integer consists of the lower 7 bits of each byte 22154 ** for all bytes that have the 8th bit set and one byte with the 8th 22155 ** bit clear. Except, if we get to the 9th byte, it stores the full 22156 ** 8 bits and is the last byte. 22157 */ 22158 static int FTS5_NOINLINE fts5PutVarint64(unsigned char *p, u64 v){ 22159 int i, j, n; 22160 u8 buf[10]; 22161 if( v & (((u64)0xff000000)<<32) ){ 22162 p[8] = (u8)v; 22163 v >>= 8; 22164 for(i=7; i>=0; i--){ 22165 p[i] = (u8)((v & 0x7f) | 0x80); 22166 v >>= 7; 22167 } 22168 return 9; 22169 } 22170 n = 0; 22171 do{ 22172 buf[n++] = (u8)((v & 0x7f) | 0x80); 22173 v >>= 7; 22174 }while( v!=0 ); 22175 buf[0] &= 0x7f; 22176 assert( n<=9 ); 22177 for(i=0, j=n-1; j>=0; j--, i++){ 22178 p[i] = buf[j]; 22179 } 22180 return n; 22181 } 22182 22183 static int sqlite3Fts5PutVarint(unsigned char *p, u64 v){ 22184 if( v<=0x7f ){ 22185 p[0] = v&0x7f; 22186 return 1; 22187 } 22188 if( v<=0x3fff ){ 22189 p[0] = ((v>>7)&0x7f)|0x80; 22190 p[1] = v&0x7f; 22191 return 2; 22192 } 22193 return fts5PutVarint64(p,v); 22194 } 22195 22196 22197 static int sqlite3Fts5GetVarintLen(u32 iVal){ 22198 #if 0 22199 if( iVal<(1 << 7 ) ) return 1; 22200 #endif 22201 assert( iVal>=(1 << 7) ); 22202 if( iVal<(1 << 14) ) return 2; 22203 if( iVal<(1 << 21) ) return 3; 22204 if( iVal<(1 << 28) ) return 4; 22205 return 5; 22206 } 22207 22208 #line 1 "fts5_vocab.c" 22209 /* 22210 ** 2015 May 08 22211 ** 22212 ** The author disclaims copyright to this source code. In place of 22213 ** a legal notice, here is a blessing: 22214 ** 22215 ** May you do good and not evil. 22216 ** May you find forgiveness for yourself and forgive others. 22217 ** May you share freely, never taking more than you give. 22218 ** 22219 ****************************************************************************** 22220 ** 22221 ** This is an SQLite virtual table module implementing direct access to an 22222 ** existing FTS5 index. The module may create several different types of 22223 ** tables: 22224 ** 22225 ** col: 22226 ** CREATE TABLE vocab(term, col, doc, cnt, PRIMARY KEY(term, col)); 22227 ** 22228 ** One row for each term/column combination. The value of $doc is set to 22229 ** the number of fts5 rows that contain at least one instance of term 22230 ** $term within column $col. Field $cnt is set to the total number of 22231 ** instances of term $term in column $col (in any row of the fts5 table). 22232 ** 22233 ** row: 22234 ** CREATE TABLE vocab(term, doc, cnt, PRIMARY KEY(term)); 22235 ** 22236 ** One row for each term in the database. The value of $doc is set to 22237 ** the number of fts5 rows that contain at least one instance of term 22238 ** $term. Field $cnt is set to the total number of instances of term 22239 ** $term in the database. 22240 ** 22241 ** instance: 22242 ** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>)); 22243 ** 22244 ** One row for each term instance in the database. 22245 */ 22246 22247 22248 /* #include "fts5Int.h" */ 22249 22250 22251 typedef struct Fts5VocabTable Fts5VocabTable; 22252 typedef struct Fts5VocabCursor Fts5VocabCursor; 22253 22254 struct Fts5VocabTable { 22255 sqlite3_vtab base; 22256 char *zFts5Tbl; /* Name of fts5 table */ 22257 char *zFts5Db; /* Db containing fts5 table */ 22258 sqlite3 *db; /* Database handle */ 22259 Fts5Global *pGlobal; /* FTS5 global object for this database */ 22260 int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ 22261 unsigned bBusy; /* True if busy */ 22262 }; 22263 22264 struct Fts5VocabCursor { 22265 sqlite3_vtab_cursor base; 22266 sqlite3_stmt *pStmt; /* Statement holding lock on pIndex */ 22267 Fts5Table *pFts5; /* Associated FTS5 table */ 22268 22269 int bEof; /* True if this cursor is at EOF */ 22270 Fts5IndexIter *pIter; /* Term/rowid iterator object */ 22271 void *pStruct; /* From sqlite3Fts5StructureRef() */ 22272 22273 int nLeTerm; /* Size of zLeTerm in bytes */ 22274 char *zLeTerm; /* (term <= $zLeTerm) paramater, or NULL */ 22275 22276 /* These are used by 'col' tables only */ 22277 int iCol; 22278 i64 *aCnt; 22279 i64 *aDoc; 22280 22281 /* Output values used by all tables. */ 22282 i64 rowid; /* This table's current rowid value */ 22283 Fts5Buffer term; /* Current value of 'term' column */ 22284 22285 /* Output values Used by 'instance' tables only */ 22286 i64 iInstPos; 22287 int iInstOff; 22288 }; 22289 22290 #define FTS5_VOCAB_COL 0 22291 #define FTS5_VOCAB_ROW 1 22292 #define FTS5_VOCAB_INSTANCE 2 22293 22294 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" 22295 #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" 22296 #define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset" 22297 22298 /* 22299 ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. 22300 */ 22301 #define FTS5_VOCAB_TERM_EQ 0x01 22302 #define FTS5_VOCAB_TERM_GE 0x02 22303 #define FTS5_VOCAB_TERM_LE 0x04 22304 22305 22306 /* 22307 ** Translate a string containing an fts5vocab table type to an 22308 ** FTS5_VOCAB_XXX constant. If successful, set *peType to the output 22309 ** value and return SQLITE_OK. Otherwise, set *pzErr to an error message 22310 ** and return SQLITE_ERROR. 22311 */ 22312 static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ 22313 int rc = SQLITE_OK; 22314 char *zCopy = sqlite3Fts5Strndup(&rc, zType, -1); 22315 if( rc==SQLITE_OK ){ 22316 sqlite3Fts5Dequote(zCopy); 22317 if( sqlite3_stricmp(zCopy, "col")==0 ){ 22318 *peType = FTS5_VOCAB_COL; 22319 }else 22320 22321 if( sqlite3_stricmp(zCopy, "row")==0 ){ 22322 *peType = FTS5_VOCAB_ROW; 22323 }else 22324 if( sqlite3_stricmp(zCopy, "instance")==0 ){ 22325 *peType = FTS5_VOCAB_INSTANCE; 22326 }else 22327 { 22328 *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); 22329 rc = SQLITE_ERROR; 22330 } 22331 sqlite3_free(zCopy); 22332 } 22333 22334 return rc; 22335 } 22336 22337 22338 /* 22339 ** The xDisconnect() virtual table method. 22340 */ 22341 static int fts5VocabDisconnectMethod(sqlite3_vtab *pVtab){ 22342 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; 22343 sqlite3_free(pTab); 22344 return SQLITE_OK; 22345 } 22346 22347 /* 22348 ** The xDestroy() virtual table method. 22349 */ 22350 static int fts5VocabDestroyMethod(sqlite3_vtab *pVtab){ 22351 Fts5VocabTable *pTab = (Fts5VocabTable*)pVtab; 22352 sqlite3_free(pTab); 22353 return SQLITE_OK; 22354 } 22355 22356 /* 22357 ** This function is the implementation of both the xConnect and xCreate 22358 ** methods of the FTS3 virtual table. 22359 ** 22360 ** The argv[] array contains the following: 22361 ** 22362 ** argv[0] -> module name ("fts5vocab") 22363 ** argv[1] -> database name 22364 ** argv[2] -> table name 22365 ** 22366 ** then: 22367 ** 22368 ** argv[3] -> name of fts5 table 22369 ** argv[4] -> type of fts5vocab table 22370 ** 22371 ** or, for tables in the TEMP schema only. 22372 ** 22373 ** argv[3] -> name of fts5 tables database 22374 ** argv[4] -> name of fts5 table 22375 ** argv[5] -> type of fts5vocab table 22376 */ 22377 static int fts5VocabInitVtab( 22378 sqlite3 *db, /* The SQLite database connection */ 22379 void *pAux, /* Pointer to Fts5Global object */ 22380 int argc, /* Number of elements in argv array */ 22381 const char * const *argv, /* xCreate/xConnect argument array */ 22382 sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */ 22383 char **pzErr /* Write any error message here */ 22384 ){ 22385 const char *azSchema[] = { 22386 "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", 22387 "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")", 22388 "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")" 22389 }; 22390 22391 Fts5VocabTable *pRet = 0; 22392 int rc = SQLITE_OK; /* Return code */ 22393 int bDb; 22394 22395 bDb = (argc==6 && strlen(argv[1])==4 && memcmp("temp", argv[1], 4)==0); 22396 22397 if( argc!=5 && bDb==0 ){ 22398 *pzErr = sqlite3_mprintf("wrong number of vtable arguments"); 22399 rc = SQLITE_ERROR; 22400 }else{ 22401 int nByte; /* Bytes of space to allocate */ 22402 const char *zDb = bDb ? argv[3] : argv[1]; 22403 const char *zTab = bDb ? argv[4] : argv[3]; 22404 const char *zType = bDb ? argv[5] : argv[4]; 22405 int nDb = (int)strlen(zDb)+1; 22406 int nTab = (int)strlen(zTab)+1; 22407 int eType = 0; 22408 22409 rc = fts5VocabTableType(zType, pzErr, &eType); 22410 if( rc==SQLITE_OK ){ 22411 assert( eType>=0 && eType<ArraySize(azSchema) ); 22412 rc = sqlite3_declare_vtab(db, azSchema[eType]); 22413 } 22414 22415 nByte = sizeof(Fts5VocabTable) + nDb + nTab; 22416 pRet = sqlite3Fts5MallocZero(&rc, nByte); 22417 if( pRet ){ 22418 pRet->pGlobal = (Fts5Global*)pAux; 22419 pRet->eType = eType; 22420 pRet->db = db; 22421 pRet->zFts5Tbl = (char*)&pRet[1]; 22422 pRet->zFts5Db = &pRet->zFts5Tbl[nTab]; 22423 memcpy(pRet->zFts5Tbl, zTab, nTab); 22424 memcpy(pRet->zFts5Db, zDb, nDb); 22425 sqlite3Fts5Dequote(pRet->zFts5Tbl); 22426 sqlite3Fts5Dequote(pRet->zFts5Db); 22427 } 22428 } 22429 22430 *ppVTab = (sqlite3_vtab*)pRet; 22431 return rc; 22432 } 22433 22434 22435 /* 22436 ** The xConnect() and xCreate() methods for the virtual table. All the 22437 ** work is done in function fts5VocabInitVtab(). 22438 */ 22439 static int fts5VocabConnectMethod( 22440 sqlite3 *db, /* Database connection */ 22441 void *pAux, /* Pointer to tokenizer hash table */ 22442 int argc, /* Number of elements in argv array */ 22443 const char * const *argv, /* xCreate/xConnect argument array */ 22444 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ 22445 char **pzErr /* OUT: sqlite3_malloc'd error message */ 22446 ){ 22447 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); 22448 } 22449 static int fts5VocabCreateMethod( 22450 sqlite3 *db, /* Database connection */ 22451 void *pAux, /* Pointer to tokenizer hash table */ 22452 int argc, /* Number of elements in argv array */ 22453 const char * const *argv, /* xCreate/xConnect argument array */ 22454 sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ 22455 char **pzErr /* OUT: sqlite3_malloc'd error message */ 22456 ){ 22457 return fts5VocabInitVtab(db, pAux, argc, argv, ppVtab, pzErr); 22458 } 22459 22460 /* 22461 ** Implementation of the xBestIndex method. 22462 ** 22463 ** Only constraints of the form: 22464 ** 22465 ** term <= ? 22466 ** term == ? 22467 ** term >= ? 22468 ** 22469 ** are interpreted. Less-than and less-than-or-equal are treated 22470 ** identically, as are greater-than and greater-than-or-equal. 22471 */ 22472 static int fts5VocabBestIndexMethod( 22473 sqlite3_vtab *pUnused, 22474 sqlite3_index_info *pInfo 22475 ){ 22476 int i; 22477 int iTermEq = -1; 22478 int iTermGe = -1; 22479 int iTermLe = -1; 22480 int idxNum = 0; 22481 int nArg = 0; 22482 22483 UNUSED_PARAM(pUnused); 22484 22485 for(i=0; i<pInfo->nConstraint; i++){ 22486 struct sqlite3_index_constraint *p = &pInfo->aConstraint[i]; 22487 if( p->usable==0 ) continue; 22488 if( p->iColumn==0 ){ /* term column */ 22489 if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ) iTermEq = i; 22490 if( p->op==SQLITE_INDEX_CONSTRAINT_LE ) iTermLe = i; 22491 if( p->op==SQLITE_INDEX_CONSTRAINT_LT ) iTermLe = i; 22492 if( p->op==SQLITE_INDEX_CONSTRAINT_GE ) iTermGe = i; 22493 if( p->op==SQLITE_INDEX_CONSTRAINT_GT ) iTermGe = i; 22494 } 22495 } 22496 22497 if( iTermEq>=0 ){ 22498 idxNum |= FTS5_VOCAB_TERM_EQ; 22499 pInfo->aConstraintUsage[iTermEq].argvIndex = ++nArg; 22500 pInfo->estimatedCost = 100; 22501 }else{ 22502 pInfo->estimatedCost = 1000000; 22503 if( iTermGe>=0 ){ 22504 idxNum |= FTS5_VOCAB_TERM_GE; 22505 pInfo->aConstraintUsage[iTermGe].argvIndex = ++nArg; 22506 pInfo->estimatedCost = pInfo->estimatedCost / 2; 22507 } 22508 if( iTermLe>=0 ){ 22509 idxNum |= FTS5_VOCAB_TERM_LE; 22510 pInfo->aConstraintUsage[iTermLe].argvIndex = ++nArg; 22511 pInfo->estimatedCost = pInfo->estimatedCost / 2; 22512 } 22513 } 22514 22515 /* This virtual table always delivers results in ascending order of 22516 ** the "term" column (column 0). So if the user has requested this 22517 ** specifically - "ORDER BY term" or "ORDER BY term ASC" - set the 22518 ** sqlite3_index_info.orderByConsumed flag to tell the core the results 22519 ** are already in sorted order. */ 22520 if( pInfo->nOrderBy==1 22521 && pInfo->aOrderBy[0].iColumn==0 22522 && pInfo->aOrderBy[0].desc==0 22523 ){ 22524 pInfo->orderByConsumed = 1; 22525 } 22526 22527 pInfo->idxNum = idxNum; 22528 return SQLITE_OK; 22529 } 22530 22531 /* 22532 ** Implementation of xOpen method. 22533 */ 22534 static int fts5VocabOpenMethod( 22535 sqlite3_vtab *pVTab, 22536 sqlite3_vtab_cursor **ppCsr 22537 ){ 22538 Fts5VocabTable *pTab = (Fts5VocabTable*)pVTab; 22539 Fts5Table *pFts5 = 0; 22540 Fts5VocabCursor *pCsr = 0; 22541 int rc = SQLITE_OK; 22542 sqlite3_stmt *pStmt = 0; 22543 char *zSql = 0; 22544 22545 if( pTab->bBusy ){ 22546 pVTab->zErrMsg = sqlite3_mprintf( 22547 "recursive definition for %s.%s", pTab->zFts5Db, pTab->zFts5Tbl 22548 ); 22549 return SQLITE_ERROR; 22550 } 22551 zSql = sqlite3Fts5Mprintf(&rc, 22552 "SELECT t.%Q FROM %Q.%Q AS t WHERE t.%Q MATCH '*id'", 22553 pTab->zFts5Tbl, pTab->zFts5Db, pTab->zFts5Tbl, pTab->zFts5Tbl 22554 ); 22555 if( zSql ){ 22556 rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); 22557 } 22558 sqlite3_free(zSql); 22559 assert( rc==SQLITE_OK || pStmt==0 ); 22560 if( rc==SQLITE_ERROR ) rc = SQLITE_OK; 22561 22562 pTab->bBusy = 1; 22563 if( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ 22564 i64 iId = sqlite3_column_int64(pStmt, 0); 22565 pFts5 = sqlite3Fts5TableFromCsrid(pTab->pGlobal, iId); 22566 } 22567 pTab->bBusy = 0; 22568 22569 if( rc==SQLITE_OK ){ 22570 if( pFts5==0 ){ 22571 rc = sqlite3_finalize(pStmt); 22572 pStmt = 0; 22573 if( rc==SQLITE_OK ){ 22574 pVTab->zErrMsg = sqlite3_mprintf( 22575 "no such fts5 table: %s.%s", pTab->zFts5Db, pTab->zFts5Tbl 22576 ); 22577 rc = SQLITE_ERROR; 22578 } 22579 }else{ 22580 rc = sqlite3Fts5FlushToDisk(pFts5); 22581 } 22582 } 22583 22584 if( rc==SQLITE_OK ){ 22585 i64 nByte = pFts5->pConfig->nCol * sizeof(i64)*2 + sizeof(Fts5VocabCursor); 22586 pCsr = (Fts5VocabCursor*)sqlite3Fts5MallocZero(&rc, nByte); 22587 } 22588 22589 if( pCsr ){ 22590 pCsr->pFts5 = pFts5; 22591 pCsr->pStmt = pStmt; 22592 pCsr->aCnt = (i64*)&pCsr[1]; 22593 pCsr->aDoc = &pCsr->aCnt[pFts5->pConfig->nCol]; 22594 }else{ 22595 sqlite3_finalize(pStmt); 22596 } 22597 22598 *ppCsr = (sqlite3_vtab_cursor*)pCsr; 22599 return rc; 22600 } 22601 22602 static void fts5VocabResetCursor(Fts5VocabCursor *pCsr){ 22603 pCsr->rowid = 0; 22604 sqlite3Fts5IterClose(pCsr->pIter); 22605 sqlite3Fts5StructureRelease(pCsr->pStruct); 22606 pCsr->pStruct = 0; 22607 pCsr->pIter = 0; 22608 sqlite3_free(pCsr->zLeTerm); 22609 pCsr->nLeTerm = -1; 22610 pCsr->zLeTerm = 0; 22611 pCsr->bEof = 0; 22612 } 22613 22614 /* 22615 ** Close the cursor. For additional information see the documentation 22616 ** on the xClose method of the virtual table interface. 22617 */ 22618 static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ 22619 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22620 fts5VocabResetCursor(pCsr); 22621 sqlite3Fts5BufferFree(&pCsr->term); 22622 sqlite3_finalize(pCsr->pStmt); 22623 sqlite3_free(pCsr); 22624 return SQLITE_OK; 22625 } 22626 22627 static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ 22628 int rc = SQLITE_OK; 22629 22630 if( sqlite3Fts5IterEof(pCsr->pIter) ){ 22631 pCsr->bEof = 1; 22632 }else{ 22633 const char *zTerm; 22634 int nTerm; 22635 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); 22636 if( pCsr->nLeTerm>=0 ){ 22637 int nCmp = MIN(nTerm, pCsr->nLeTerm); 22638 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); 22639 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ 22640 pCsr->bEof = 1; 22641 } 22642 } 22643 22644 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); 22645 } 22646 return rc; 22647 } 22648 22649 static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ 22650 int eDetail = pCsr->pFts5->pConfig->eDetail; 22651 int rc = SQLITE_OK; 22652 Fts5IndexIter *pIter = pCsr->pIter; 22653 i64 *pp = &pCsr->iInstPos; 22654 int *po = &pCsr->iInstOff; 22655 22656 assert( sqlite3Fts5IterEof(pIter)==0 ); 22657 assert( pCsr->bEof==0 ); 22658 while( eDetail==FTS5_DETAIL_NONE 22659 || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) 22660 ){ 22661 pCsr->iInstPos = 0; 22662 pCsr->iInstOff = 0; 22663 22664 rc = sqlite3Fts5IterNextScan(pCsr->pIter); 22665 if( rc==SQLITE_OK ){ 22666 rc = fts5VocabInstanceNewTerm(pCsr); 22667 if( pCsr->bEof || eDetail==FTS5_DETAIL_NONE ) break; 22668 } 22669 if( rc ){ 22670 pCsr->bEof = 1; 22671 break; 22672 } 22673 } 22674 22675 return rc; 22676 } 22677 22678 /* 22679 ** Advance the cursor to the next row in the table. 22680 */ 22681 static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ 22682 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22683 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; 22684 int nCol = pCsr->pFts5->pConfig->nCol; 22685 int rc; 22686 22687 rc = sqlite3Fts5StructureTest(pCsr->pFts5->pIndex, pCsr->pStruct); 22688 if( rc!=SQLITE_OK ) return rc; 22689 pCsr->rowid++; 22690 22691 if( pTab->eType==FTS5_VOCAB_INSTANCE ){ 22692 return fts5VocabInstanceNext(pCsr); 22693 } 22694 22695 if( pTab->eType==FTS5_VOCAB_COL ){ 22696 for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ 22697 if( pCsr->aDoc[pCsr->iCol] ) break; 22698 } 22699 } 22700 22701 if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){ 22702 if( sqlite3Fts5IterEof(pCsr->pIter) ){ 22703 pCsr->bEof = 1; 22704 }else{ 22705 const char *zTerm; 22706 int nTerm; 22707 22708 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); 22709 assert( nTerm>=0 ); 22710 if( pCsr->nLeTerm>=0 ){ 22711 int nCmp = MIN(nTerm, pCsr->nLeTerm); 22712 int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); 22713 if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){ 22714 pCsr->bEof = 1; 22715 return SQLITE_OK; 22716 } 22717 } 22718 22719 sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); 22720 memset(pCsr->aCnt, 0, nCol * sizeof(i64)); 22721 memset(pCsr->aDoc, 0, nCol * sizeof(i64)); 22722 pCsr->iCol = 0; 22723 22724 assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); 22725 while( rc==SQLITE_OK ){ 22726 int eDetail = pCsr->pFts5->pConfig->eDetail; 22727 const u8 *pPos; int nPos; /* Position list */ 22728 i64 iPos = 0; /* 64-bit position read from poslist */ 22729 int iOff = 0; /* Current offset within position list */ 22730 22731 pPos = pCsr->pIter->pData; 22732 nPos = pCsr->pIter->nData; 22733 22734 switch( pTab->eType ){ 22735 case FTS5_VOCAB_ROW: 22736 if( eDetail==FTS5_DETAIL_FULL ){ 22737 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ 22738 pCsr->aCnt[0]++; 22739 } 22740 } 22741 pCsr->aDoc[0]++; 22742 break; 22743 22744 case FTS5_VOCAB_COL: 22745 if( eDetail==FTS5_DETAIL_FULL ){ 22746 int iCol = -1; 22747 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ 22748 int ii = FTS5_POS2COLUMN(iPos); 22749 if( iCol!=ii ){ 22750 if( ii>=nCol ){ 22751 rc = FTS5_CORRUPT; 22752 break; 22753 } 22754 pCsr->aDoc[ii]++; 22755 iCol = ii; 22756 } 22757 pCsr->aCnt[ii]++; 22758 } 22759 }else if( eDetail==FTS5_DETAIL_COLUMNS ){ 22760 while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ 22761 assert_nc( iPos>=0 && iPos<nCol ); 22762 if( iPos>=nCol ){ 22763 rc = FTS5_CORRUPT; 22764 break; 22765 } 22766 pCsr->aDoc[iPos]++; 22767 } 22768 }else{ 22769 assert( eDetail==FTS5_DETAIL_NONE ); 22770 pCsr->aDoc[0]++; 22771 } 22772 break; 22773 22774 default: 22775 assert( pTab->eType==FTS5_VOCAB_INSTANCE ); 22776 break; 22777 } 22778 22779 if( rc==SQLITE_OK ){ 22780 rc = sqlite3Fts5IterNextScan(pCsr->pIter); 22781 } 22782 if( pTab->eType==FTS5_VOCAB_INSTANCE ) break; 22783 22784 if( rc==SQLITE_OK ){ 22785 zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); 22786 if( nTerm!=pCsr->term.n 22787 || (nTerm>0 && memcmp(zTerm, pCsr->term.p, nTerm)) 22788 ){ 22789 break; 22790 } 22791 if( sqlite3Fts5IterEof(pCsr->pIter) ) break; 22792 } 22793 } 22794 } 22795 } 22796 22797 if( rc==SQLITE_OK && pCsr->bEof==0 && pTab->eType==FTS5_VOCAB_COL ){ 22798 for(/* noop */; pCsr->iCol<nCol && pCsr->aDoc[pCsr->iCol]==0; pCsr->iCol++); 22799 if( pCsr->iCol==nCol ){ 22800 rc = FTS5_CORRUPT; 22801 } 22802 } 22803 return rc; 22804 } 22805 22806 /* 22807 ** This is the xFilter implementation for the virtual table. 22808 */ 22809 static int fts5VocabFilterMethod( 22810 sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ 22811 int idxNum, /* Strategy index */ 22812 const char *zUnused, /* Unused */ 22813 int nUnused, /* Number of elements in apVal */ 22814 sqlite3_value **apVal /* Arguments for the indexing scheme */ 22815 ){ 22816 Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; 22817 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22818 int eType = pTab->eType; 22819 int rc = SQLITE_OK; 22820 22821 int iVal = 0; 22822 int f = FTS5INDEX_QUERY_SCAN; 22823 const char *zTerm = 0; 22824 int nTerm = 0; 22825 22826 sqlite3_value *pEq = 0; 22827 sqlite3_value *pGe = 0; 22828 sqlite3_value *pLe = 0; 22829 22830 UNUSED_PARAM2(zUnused, nUnused); 22831 22832 fts5VocabResetCursor(pCsr); 22833 if( idxNum & FTS5_VOCAB_TERM_EQ ) pEq = apVal[iVal++]; 22834 if( idxNum & FTS5_VOCAB_TERM_GE ) pGe = apVal[iVal++]; 22835 if( idxNum & FTS5_VOCAB_TERM_LE ) pLe = apVal[iVal++]; 22836 22837 if( pEq ){ 22838 zTerm = (const char *)sqlite3_value_text(pEq); 22839 nTerm = sqlite3_value_bytes(pEq); 22840 f = 0; 22841 }else{ 22842 if( pGe ){ 22843 zTerm = (const char *)sqlite3_value_text(pGe); 22844 nTerm = sqlite3_value_bytes(pGe); 22845 } 22846 if( pLe ){ 22847 const char *zCopy = (const char *)sqlite3_value_text(pLe); 22848 if( zCopy==0 ) zCopy = ""; 22849 pCsr->nLeTerm = sqlite3_value_bytes(pLe); 22850 pCsr->zLeTerm = sqlite3_malloc(pCsr->nLeTerm+1); 22851 if( pCsr->zLeTerm==0 ){ 22852 rc = SQLITE_NOMEM; 22853 }else{ 22854 memcpy(pCsr->zLeTerm, zCopy, pCsr->nLeTerm+1); 22855 } 22856 } 22857 } 22858 22859 if( rc==SQLITE_OK ){ 22860 Fts5Index *pIndex = pCsr->pFts5->pIndex; 22861 rc = sqlite3Fts5IndexQuery(pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); 22862 if( rc==SQLITE_OK ){ 22863 pCsr->pStruct = sqlite3Fts5StructureRef(pIndex); 22864 } 22865 } 22866 if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){ 22867 rc = fts5VocabInstanceNewTerm(pCsr); 22868 } 22869 if( rc==SQLITE_OK && !pCsr->bEof 22870 && (eType!=FTS5_VOCAB_INSTANCE 22871 || pCsr->pFts5->pConfig->eDetail!=FTS5_DETAIL_NONE) 22872 ){ 22873 rc = fts5VocabNextMethod(pCursor); 22874 } 22875 22876 return rc; 22877 } 22878 22879 /* 22880 ** This is the xEof method of the virtual table. SQLite calls this 22881 ** routine to find out if it has reached the end of a result set. 22882 */ 22883 static int fts5VocabEofMethod(sqlite3_vtab_cursor *pCursor){ 22884 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22885 return pCsr->bEof; 22886 } 22887 22888 static int fts5VocabColumnMethod( 22889 sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ 22890 sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ 22891 int iCol /* Index of column to read value from */ 22892 ){ 22893 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22894 int eDetail = pCsr->pFts5->pConfig->eDetail; 22895 int eType = ((Fts5VocabTable*)(pCursor->pVtab))->eType; 22896 i64 iVal = 0; 22897 22898 if( iCol==0 ){ 22899 sqlite3_result_text( 22900 pCtx, (const char*)pCsr->term.p, pCsr->term.n, SQLITE_TRANSIENT 22901 ); 22902 }else if( eType==FTS5_VOCAB_COL ){ 22903 assert( iCol==1 || iCol==2 || iCol==3 ); 22904 if( iCol==1 ){ 22905 if( eDetail!=FTS5_DETAIL_NONE ){ 22906 const char *z = pCsr->pFts5->pConfig->azCol[pCsr->iCol]; 22907 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); 22908 } 22909 }else if( iCol==2 ){ 22910 iVal = pCsr->aDoc[pCsr->iCol]; 22911 }else{ 22912 iVal = pCsr->aCnt[pCsr->iCol]; 22913 } 22914 }else if( eType==FTS5_VOCAB_ROW ){ 22915 assert( iCol==1 || iCol==2 ); 22916 if( iCol==1 ){ 22917 iVal = pCsr->aDoc[0]; 22918 }else{ 22919 iVal = pCsr->aCnt[0]; 22920 } 22921 }else{ 22922 assert( eType==FTS5_VOCAB_INSTANCE ); 22923 switch( iCol ){ 22924 case 1: 22925 sqlite3_result_int64(pCtx, pCsr->pIter->iRowid); 22926 break; 22927 case 2: { 22928 int ii = -1; 22929 if( eDetail==FTS5_DETAIL_FULL ){ 22930 ii = FTS5_POS2COLUMN(pCsr->iInstPos); 22931 }else if( eDetail==FTS5_DETAIL_COLUMNS ){ 22932 ii = (int)pCsr->iInstPos; 22933 } 22934 if( ii>=0 && ii<pCsr->pFts5->pConfig->nCol ){ 22935 const char *z = pCsr->pFts5->pConfig->azCol[ii]; 22936 sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); 22937 } 22938 break; 22939 } 22940 default: { 22941 assert( iCol==3 ); 22942 if( eDetail==FTS5_DETAIL_FULL ){ 22943 int ii = FTS5_POS2OFFSET(pCsr->iInstPos); 22944 sqlite3_result_int(pCtx, ii); 22945 } 22946 break; 22947 } 22948 } 22949 } 22950 22951 if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); 22952 return SQLITE_OK; 22953 } 22954 22955 /* 22956 ** This is the xRowid method. The SQLite core calls this routine to 22957 ** retrieve the rowid for the current row of the result set. The 22958 ** rowid should be written to *pRowid. 22959 */ 22960 static int fts5VocabRowidMethod( 22961 sqlite3_vtab_cursor *pCursor, 22962 sqlite_int64 *pRowid 22963 ){ 22964 Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; 22965 *pRowid = pCsr->rowid; 22966 return SQLITE_OK; 22967 } 22968 22969 static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ 22970 static const sqlite3_module fts5Vocab = { 22971 /* iVersion */ 2, 22972 /* xCreate */ fts5VocabCreateMethod, 22973 /* xConnect */ fts5VocabConnectMethod, 22974 /* xBestIndex */ fts5VocabBestIndexMethod, 22975 /* xDisconnect */ fts5VocabDisconnectMethod, 22976 /* xDestroy */ fts5VocabDestroyMethod, 22977 /* xOpen */ fts5VocabOpenMethod, 22978 /* xClose */ fts5VocabCloseMethod, 22979 /* xFilter */ fts5VocabFilterMethod, 22980 /* xNext */ fts5VocabNextMethod, 22981 /* xEof */ fts5VocabEofMethod, 22982 /* xColumn */ fts5VocabColumnMethod, 22983 /* xRowid */ fts5VocabRowidMethod, 22984 /* xUpdate */ 0, 22985 /* xBegin */ 0, 22986 /* xSync */ 0, 22987 /* xCommit */ 0, 22988 /* xRollback */ 0, 22989 /* xFindFunction */ 0, 22990 /* xRename */ 0, 22991 /* xSavepoint */ 0, 22992 /* xRelease */ 0, 22993 /* xRollbackTo */ 0, 22994 /* xShadowName */ 0 22995 }; 22996 void *p = (void*)pGlobal; 22997 22998 return sqlite3_create_module_v2(db, "fts5vocab", &fts5Vocab, p, 0); 22999 } 23000 23001 23002 23003 #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ 23004