1 /* 2 ** 2009 Nov 12 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 */ 14 #ifndef _FTSINT_H 15 #define _FTSINT_H 16 17 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 18 # define NDEBUG 1 19 #endif 20 21 /* FTS3/FTS4 require virtual tables */ 22 #ifdef SQLITE_OMIT_VIRTUALTABLE 23 # undef SQLITE_ENABLE_FTS3 24 # undef SQLITE_ENABLE_FTS4 25 #endif 26 27 /* 28 ** FTS4 is really an extension for FTS3. It is enabled using the 29 ** SQLITE_ENABLE_FTS3 macro. But to avoid confusion we also all 30 ** the SQLITE_ENABLE_FTS4 macro to serve as an alisse for SQLITE_ENABLE_FTS3. 31 */ 32 #if defined(SQLITE_ENABLE_FTS4) && !defined(SQLITE_ENABLE_FTS3) 33 # define SQLITE_ENABLE_FTS3 34 #endif 35 36 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) 37 38 /* If not building as part of the core, include sqlite3ext.h. */ 39 #ifndef SQLITE_CORE 40 # include "sqlite3ext.h" 41 SQLITE_EXTENSION_INIT3 42 #endif 43 44 #include "sqlite3.h" 45 #include "fts3_tokenizer.h" 46 #include "fts3_hash.h" 47 48 /* 49 ** This constant determines the maximum depth of an FTS expression tree 50 ** that the library will create and use. FTS uses recursion to perform 51 ** various operations on the query tree, so the disadvantage of a large 52 ** limit is that it may allow very large queries to use large amounts 53 ** of stack space (perhaps causing a stack overflow). 54 */ 55 #ifndef SQLITE_FTS3_MAX_EXPR_DEPTH 56 # define SQLITE_FTS3_MAX_EXPR_DEPTH 12 57 #endif 58 59 60 /* 61 ** This constant controls how often segments are merged. Once there are 62 ** FTS3_MERGE_COUNT segments of level N, they are merged into a single 63 ** segment of level N+1. 64 */ 65 #define FTS3_MERGE_COUNT 16 66 67 /* 68 ** This is the maximum amount of data (in bytes) to store in the 69 ** Fts3Table.pendingTerms hash table. Normally, the hash table is 70 ** populated as documents are inserted/updated/deleted in a transaction 71 ** and used to create a new segment when the transaction is committed. 72 ** However if this limit is reached midway through a transaction, a new 73 ** segment is created and the hash table cleared immediately. 74 */ 75 #define FTS3_MAX_PENDING_DATA (1*1024*1024) 76 77 /* 78 ** Macro to return the number of elements in an array. SQLite has a 79 ** similar macro called ArraySize(). Use a different name to avoid 80 ** a collision when building an amalgamation with built-in FTS3. 81 */ 82 #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0]))) 83 84 85 #ifndef MIN 86 # define MIN(x,y) ((x)<(y)?(x):(y)) 87 #endif 88 #ifndef MAX 89 # define MAX(x,y) ((x)>(y)?(x):(y)) 90 #endif 91 92 /* 93 ** Maximum length of a varint encoded integer. The varint format is different 94 ** from that used by SQLite, so the maximum length is 10, not 9. 95 */ 96 #define FTS3_VARINT_MAX 10 97 98 #define FTS3_BUFFER_PADDING 8 99 100 /* 101 ** FTS4 virtual tables may maintain multiple indexes - one index of all terms 102 ** in the document set and zero or more prefix indexes. All indexes are stored 103 ** as one or more b+-trees in the %_segments and %_segdir tables. 104 ** 105 ** It is possible to determine which index a b+-tree belongs to based on the 106 ** value stored in the "%_segdir.level" column. Given this value L, the index 107 ** that the b+-tree belongs to is (L<<10). In other words, all b+-trees with 108 ** level values between 0 and 1023 (inclusive) belong to index 0, all levels 109 ** between 1024 and 2047 to index 1, and so on. 110 ** 111 ** It is considered impossible for an index to use more than 1024 levels. In 112 ** theory though this may happen, but only after at least 113 ** (FTS3_MERGE_COUNT^1024) separate flushes of the pending-terms tables. 114 */ 115 #define FTS3_SEGDIR_MAXLEVEL 1024 116 #define FTS3_SEGDIR_MAXLEVEL_STR "1024" 117 118 /* 119 ** The testcase() macro is only used by the amalgamation. If undefined, 120 ** make it a no-op. 121 */ 122 #ifndef testcase 123 # define testcase(X) 124 #endif 125 126 /* 127 ** Terminator values for position-lists and column-lists. 128 */ 129 #define POS_COLUMN (1) /* Column-list terminator */ 130 #define POS_END (0) /* Position-list terminator */ 131 132 /* 133 ** The assert_fts3_nc() macro is similar to the assert() macro, except that it 134 ** is used for assert() conditions that are true only if it can be 135 ** guranteed that the database is not corrupt. 136 */ 137 #ifdef SQLITE_DEBUG 138 extern int sqlite3_fts3_may_be_corrupt; 139 # define assert_fts3_nc(x) assert(sqlite3_fts3_may_be_corrupt || (x)) 140 #else 141 # define assert_fts3_nc(x) assert(x) 142 #endif 143 144 /* 145 ** This section provides definitions to allow the 146 ** FTS3 extension to be compiled outside of the 147 ** amalgamation. 148 */ 149 #ifndef SQLITE_AMALGAMATION 150 /* 151 ** Macros indicating that conditional expressions are always true or 152 ** false. 153 */ 154 #if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_MUTATION_TEST) 155 # define SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS 1 156 #endif 157 #if defined(SQLITE_OMIT_AUXILIARY_SAFETY_CHECKS) 158 # define ALWAYS(X) (1) 159 # define NEVER(X) (0) 160 #elif !defined(NDEBUG) 161 # define ALWAYS(X) ((X)?1:(assert(0),0)) 162 # define NEVER(X) ((X)?(assert(0),1):0) 163 #else 164 # define ALWAYS(X) (X) 165 # define NEVER(X) (X) 166 #endif 167 168 /* 169 ** Internal types used by SQLite. 170 */ 171 typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */ 172 typedef short int i16; /* 2-byte (or larger) signed integer */ 173 typedef unsigned int u32; /* 4-byte unsigned integer */ 174 typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */ 175 typedef sqlite3_int64 i64; /* 8-byte signed integer */ 176 177 /* 178 ** Macro used to suppress compiler warnings for unused parameters. 179 */ 180 #define UNUSED_PARAMETER(x) (void)(x) 181 182 /* 183 ** Activate assert() only if SQLITE_TEST is enabled. 184 */ 185 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) 186 # define NDEBUG 1 187 #endif 188 189 /* 190 ** The TESTONLY macro is used to enclose variable declarations or 191 ** other bits of code that are needed to support the arguments 192 ** within testcase() and assert() macros. 193 */ 194 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) 195 # define TESTONLY(X) X 196 #else 197 # define TESTONLY(X) 198 #endif 199 200 #define LARGEST_INT64 (0xffffffff|(((i64)0x7fffffff)<<32)) 201 #define SMALLEST_INT64 (((i64)-1) - LARGEST_INT64) 202 203 #define deliberate_fall_through 204 205 #endif /* SQLITE_AMALGAMATION */ 206 207 #ifdef SQLITE_DEBUG 208 int sqlite3Fts3Corrupt(void); 209 # define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt() 210 #else 211 # define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB 212 #endif 213 214 typedef struct Fts3Table Fts3Table; 215 typedef struct Fts3Cursor Fts3Cursor; 216 typedef struct Fts3Expr Fts3Expr; 217 typedef struct Fts3Phrase Fts3Phrase; 218 typedef struct Fts3PhraseToken Fts3PhraseToken; 219 220 typedef struct Fts3Doclist Fts3Doclist; 221 typedef struct Fts3SegFilter Fts3SegFilter; 222 typedef struct Fts3DeferredToken Fts3DeferredToken; 223 typedef struct Fts3SegReader Fts3SegReader; 224 typedef struct Fts3MultiSegReader Fts3MultiSegReader; 225 226 typedef struct MatchinfoBuffer MatchinfoBuffer; 227 228 /* 229 ** A connection to a fulltext index is an instance of the following 230 ** structure. The xCreate and xConnect methods create an instance 231 ** of this structure and xDestroy and xDisconnect free that instance. 232 ** All other methods receive a pointer to the structure as one of their 233 ** arguments. 234 */ 235 struct Fts3Table { 236 sqlite3_vtab base; /* Base class used by SQLite core */ 237 sqlite3 *db; /* The database connection */ 238 const char *zDb; /* logical database name */ 239 const char *zName; /* virtual table name */ 240 int nColumn; /* number of named columns in virtual table */ 241 char **azColumn; /* column names. malloced */ 242 u8 *abNotindexed; /* True for 'notindexed' columns */ 243 sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ 244 char *zContentTbl; /* content=xxx option, or NULL */ 245 char *zLanguageid; /* languageid=xxx option, or NULL */ 246 int nAutoincrmerge; /* Value configured by 'automerge' */ 247 u32 nLeafAdd; /* Number of leaf blocks added this trans */ 248 int bLock; /* Used to prevent recursive content= tbls */ 249 250 /* Precompiled statements used by the implementation. Each of these 251 ** statements is run and reset within a single virtual table API call. 252 */ 253 sqlite3_stmt *aStmt[40]; 254 sqlite3_stmt *pSeekStmt; /* Cache for fts3CursorSeekStmt() */ 255 256 char *zReadExprlist; 257 char *zWriteExprlist; 258 259 int nNodeSize; /* Soft limit for node size */ 260 u8 bFts4; /* True for FTS4, false for FTS3 */ 261 u8 bHasStat; /* True if %_stat table exists (2==unknown) */ 262 u8 bHasDocsize; /* True if %_docsize table exists */ 263 u8 bDescIdx; /* True if doclists are in reverse order */ 264 u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */ 265 int nPgsz; /* Page size for host database */ 266 char *zSegmentsTbl; /* Name of %_segments table */ 267 sqlite3_blob *pSegments; /* Blob handle open on %_segments table */ 268 269 /* 270 ** The following array of hash tables is used to buffer pending index 271 ** updates during transactions. All pending updates buffered at any one 272 ** time must share a common language-id (see the FTS4 langid= feature). 273 ** The current language id is stored in variable iPrevLangid. 274 ** 275 ** A single FTS4 table may have multiple full-text indexes. For each index 276 ** there is an entry in the aIndex[] array. Index 0 is an index of all the 277 ** terms that appear in the document set. Each subsequent index in aIndex[] 278 ** is an index of prefixes of a specific length. 279 ** 280 ** Variable nPendingData contains an estimate the memory consumed by the 281 ** pending data structures, including hash table overhead, but not including 282 ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash 283 ** tables are flushed to disk. Variable iPrevDocid is the docid of the most 284 ** recently inserted record. 285 */ 286 int nIndex; /* Size of aIndex[] */ 287 struct Fts3Index { 288 int nPrefix; /* Prefix length (0 for main terms index) */ 289 Fts3Hash hPending; /* Pending terms table for this index */ 290 } *aIndex; 291 int nMaxPendingData; /* Max pending data before flush to disk */ 292 int nPendingData; /* Current bytes of pending data */ 293 sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */ 294 int iPrevLangid; /* Langid of recently inserted document */ 295 int bPrevDelete; /* True if last operation was a delete */ 296 297 #if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST) 298 /* State variables used for validating that the transaction control 299 ** methods of the virtual table are called at appropriate times. These 300 ** values do not contribute to FTS functionality; they are used for 301 ** verifying the operation of the SQLite core. 302 */ 303 int inTransaction; /* True after xBegin but before xCommit/xRollback */ 304 int mxSavepoint; /* Largest valid xSavepoint integer */ 305 #endif 306 307 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) 308 /* True to disable the incremental doclist optimization. This is controled 309 ** by special insert command 'test-no-incr-doclist'. */ 310 int bNoIncrDoclist; 311 312 /* Number of segments in a level */ 313 int nMergeCount; 314 #endif 315 }; 316 317 /* Macro to find the number of segments to merge */ 318 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST) 319 # define MergeCount(P) ((P)->nMergeCount) 320 #else 321 # define MergeCount(P) FTS3_MERGE_COUNT 322 #endif 323 324 /* 325 ** When the core wants to read from the virtual table, it creates a 326 ** virtual table cursor (an instance of the following structure) using 327 ** the xOpen method. Cursors are destroyed using the xClose method. 328 */ 329 struct Fts3Cursor { 330 sqlite3_vtab_cursor base; /* Base class used by SQLite core */ 331 i16 eSearch; /* Search strategy (see below) */ 332 u8 isEof; /* True if at End Of Results */ 333 u8 isRequireSeek; /* True if must seek pStmt to %_content row */ 334 u8 bSeekStmt; /* True if pStmt is a seek */ 335 sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ 336 Fts3Expr *pExpr; /* Parsed MATCH query string */ 337 int iLangid; /* Language being queried for */ 338 int nPhrase; /* Number of matchable phrases in query */ 339 Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */ 340 sqlite3_int64 iPrevId; /* Previous id read from aDoclist */ 341 char *pNextId; /* Pointer into the body of aDoclist */ 342 char *aDoclist; /* List of docids for full-text queries */ 343 int nDoclist; /* Size of buffer at aDoclist */ 344 u8 bDesc; /* True to sort in descending order */ 345 int eEvalmode; /* An FTS3_EVAL_XX constant */ 346 int nRowAvg; /* Average size of database rows, in pages */ 347 sqlite3_int64 nDoc; /* Documents in table */ 348 i64 iMinDocid; /* Minimum docid to return */ 349 i64 iMaxDocid; /* Maximum docid to return */ 350 int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ 351 MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ 352 }; 353 354 #define FTS3_EVAL_FILTER 0 355 #define FTS3_EVAL_NEXT 1 356 #define FTS3_EVAL_MATCHINFO 2 357 358 /* 359 ** The Fts3Cursor.eSearch member is always set to one of the following. 360 ** Actualy, Fts3Cursor.eSearch can be greater than or equal to 361 ** FTS3_FULLTEXT_SEARCH. If so, then Fts3Cursor.eSearch - 2 is the index 362 ** of the column to be searched. For example, in 363 ** 364 ** CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d); 365 ** SELECT docid FROM ex1 WHERE b MATCH 'one two three'; 366 ** 367 ** Because the LHS of the MATCH operator is 2nd column "b", 368 ** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1. (+0 for a, 369 ** +1 for b, +2 for c, +3 for d.) If the LHS of MATCH were "ex1" 370 ** indicating that all columns should be searched, 371 ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4. 372 */ 373 #define FTS3_FULLSCAN_SEARCH 0 /* Linear scan of %_content table */ 374 #define FTS3_DOCID_SEARCH 1 /* Lookup by rowid on %_content table */ 375 #define FTS3_FULLTEXT_SEARCH 2 /* Full-text index search */ 376 377 /* 378 ** The lower 16-bits of the sqlite3_index_info.idxNum value set by 379 ** the xBestIndex() method contains the Fts3Cursor.eSearch value described 380 ** above. The upper 16-bits contain a combination of the following 381 ** bits, used to describe extra constraints on full-text searches. 382 */ 383 #define FTS3_HAVE_LANGID 0x00010000 /* languageid=? */ 384 #define FTS3_HAVE_DOCID_GE 0x00020000 /* docid>=? */ 385 #define FTS3_HAVE_DOCID_LE 0x00040000 /* docid<=? */ 386 387 struct Fts3Doclist { 388 char *aAll; /* Array containing doclist (or NULL) */ 389 int nAll; /* Size of a[] in bytes */ 390 char *pNextDocid; /* Pointer to next docid */ 391 392 sqlite3_int64 iDocid; /* Current docid (if pList!=0) */ 393 int bFreeList; /* True if pList should be sqlite3_free()d */ 394 char *pList; /* Pointer to position list following iDocid */ 395 int nList; /* Length of position list */ 396 }; 397 398 /* 399 ** A "phrase" is a sequence of one or more tokens that must match in 400 ** sequence. A single token is the base case and the most common case. 401 ** For a sequence of tokens contained in double-quotes (i.e. "one two three") 402 ** nToken will be the number of tokens in the string. 403 */ 404 struct Fts3PhraseToken { 405 char *z; /* Text of the token */ 406 int n; /* Number of bytes in buffer z */ 407 int isPrefix; /* True if token ends with a "*" character */ 408 int bFirst; /* True if token must appear at position 0 */ 409 410 /* Variables above this point are populated when the expression is 411 ** parsed (by code in fts3_expr.c). Below this point the variables are 412 ** used when evaluating the expression. */ 413 Fts3DeferredToken *pDeferred; /* Deferred token object for this token */ 414 Fts3MultiSegReader *pSegcsr; /* Segment-reader for this token */ 415 }; 416 417 struct Fts3Phrase { 418 /* Cache of doclist for this phrase. */ 419 Fts3Doclist doclist; 420 int bIncr; /* True if doclist is loaded incrementally */ 421 int iDoclistToken; 422 423 /* Used by sqlite3Fts3EvalPhrasePoslist() if this is a descendent of an 424 ** OR condition. */ 425 char *pOrPoslist; 426 i64 iOrDocid; 427 428 /* Variables below this point are populated by fts3_expr.c when parsing 429 ** a MATCH expression. Everything above is part of the evaluation phase. 430 */ 431 int nToken; /* Number of tokens in the phrase */ 432 int iColumn; /* Index of column this phrase must match */ 433 Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */ 434 }; 435 436 /* 437 ** A tree of these objects forms the RHS of a MATCH operator. 438 ** 439 ** If Fts3Expr.eType is FTSQUERY_PHRASE and isLoaded is true, then aDoclist 440 ** points to a malloced buffer, size nDoclist bytes, containing the results 441 ** of this phrase query in FTS3 doclist format. As usual, the initial 442 ** "Length" field found in doclists stored on disk is omitted from this 443 ** buffer. 444 ** 445 ** Variable aMI is used only for FTSQUERY_NEAR nodes to store the global 446 ** matchinfo data. If it is not NULL, it points to an array of size nCol*3, 447 ** where nCol is the number of columns in the queried FTS table. The array 448 ** is populated as follows: 449 ** 450 ** aMI[iCol*3 + 0] = Undefined 451 ** aMI[iCol*3 + 1] = Number of occurrences 452 ** aMI[iCol*3 + 2] = Number of rows containing at least one instance 453 ** 454 ** The aMI array is allocated using sqlite3_malloc(). It should be freed 455 ** when the expression node is. 456 */ 457 struct Fts3Expr { 458 int eType; /* One of the FTSQUERY_XXX values defined below */ 459 int nNear; /* Valid if eType==FTSQUERY_NEAR */ 460 Fts3Expr *pParent; /* pParent->pLeft==this or pParent->pRight==this */ 461 Fts3Expr *pLeft; /* Left operand */ 462 Fts3Expr *pRight; /* Right operand */ 463 Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ 464 465 /* The following are used by the fts3_eval.c module. */ 466 sqlite3_int64 iDocid; /* Current docid */ 467 u8 bEof; /* True this expression is at EOF already */ 468 u8 bStart; /* True if iDocid is valid */ 469 u8 bDeferred; /* True if this expression is entirely deferred */ 470 471 /* The following are used by the fts3_snippet.c module. */ 472 int iPhrase; /* Index of this phrase in matchinfo() results */ 473 u32 *aMI; /* See above */ 474 }; 475 476 /* 477 ** Candidate values for Fts3Query.eType. Note that the order of the first 478 ** four values is in order of precedence when parsing expressions. For 479 ** example, the following: 480 ** 481 ** "a OR b AND c NOT d NEAR e" 482 ** 483 ** is equivalent to: 484 ** 485 ** "a OR (b AND (c NOT (d NEAR e)))" 486 */ 487 #define FTSQUERY_NEAR 1 488 #define FTSQUERY_NOT 2 489 #define FTSQUERY_AND 3 490 #define FTSQUERY_OR 4 491 #define FTSQUERY_PHRASE 5 492 493 494 /* fts3_write.c */ 495 int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*); 496 int sqlite3Fts3PendingTermsFlush(Fts3Table *); 497 void sqlite3Fts3PendingTermsClear(Fts3Table *); 498 int sqlite3Fts3Optimize(Fts3Table *); 499 int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64, 500 sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**); 501 int sqlite3Fts3SegReaderPending( 502 Fts3Table*,int,const char*,int,int,Fts3SegReader**); 503 void sqlite3Fts3SegReaderFree(Fts3SegReader *); 504 int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **); 505 int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*); 506 507 int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **); 508 int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **); 509 510 #ifndef SQLITE_DISABLE_FTS4_DEFERRED 511 void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *); 512 int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int); 513 int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *); 514 void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *); 515 int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *); 516 #else 517 # define sqlite3Fts3FreeDeferredTokens(x) 518 # define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK 519 # define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK 520 # define sqlite3Fts3FreeDeferredDoclists(x) 521 # define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK 522 #endif 523 524 void sqlite3Fts3SegmentsClose(Fts3Table *); 525 int sqlite3Fts3MaxLevel(Fts3Table *, int *); 526 527 /* Special values interpreted by sqlite3SegReaderCursor() */ 528 #define FTS3_SEGCURSOR_PENDING -1 529 #define FTS3_SEGCURSOR_ALL -2 530 531 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*); 532 int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *); 533 void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *); 534 535 int sqlite3Fts3SegReaderCursor(Fts3Table *, 536 int, int, int, const char *, int, int, int, Fts3MultiSegReader *); 537 538 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */ 539 #define FTS3_SEGMENT_REQUIRE_POS 0x00000001 540 #define FTS3_SEGMENT_IGNORE_EMPTY 0x00000002 541 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004 542 #define FTS3_SEGMENT_PREFIX 0x00000008 543 #define FTS3_SEGMENT_SCAN 0x00000010 544 #define FTS3_SEGMENT_FIRST 0x00000020 545 546 /* Type passed as 4th argument to SegmentReaderIterate() */ 547 struct Fts3SegFilter { 548 const char *zTerm; 549 int nTerm; 550 int iCol; 551 int flags; 552 }; 553 554 struct Fts3MultiSegReader { 555 /* Used internally by sqlite3Fts3SegReaderXXX() calls */ 556 Fts3SegReader **apSegment; /* Array of Fts3SegReader objects */ 557 int nSegment; /* Size of apSegment array */ 558 int nAdvance; /* How many seg-readers to advance */ 559 Fts3SegFilter *pFilter; /* Pointer to filter object */ 560 char *aBuffer; /* Buffer to merge doclists in */ 561 i64 nBuffer; /* Allocated size of aBuffer[] in bytes */ 562 563 int iColFilter; /* If >=0, filter for this column */ 564 int bRestart; 565 566 /* Used by fts3.c only. */ 567 int nCost; /* Cost of running iterator */ 568 int bLookup; /* True if a lookup of a single entry. */ 569 570 /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */ 571 char *zTerm; /* Pointer to term buffer */ 572 int nTerm; /* Size of zTerm in bytes */ 573 char *aDoclist; /* Pointer to doclist buffer */ 574 int nDoclist; /* Size of aDoclist[] in bytes */ 575 }; 576 577 int sqlite3Fts3Incrmerge(Fts3Table*,int,int); 578 579 #define fts3GetVarint32(p, piVal) ( \ 580 (*(u8*)(p)&0x80) ? sqlite3Fts3GetVarint32(p, piVal) : (*piVal=*(u8*)(p), 1) \ 581 ) 582 583 /* fts3.c */ 584 void sqlite3Fts3ErrMsg(char**,const char*,...); 585 int sqlite3Fts3PutVarint(char *, sqlite3_int64); 586 int sqlite3Fts3GetVarint(const char *, sqlite_int64 *); 587 int sqlite3Fts3GetVarintU(const char *, sqlite_uint64 *); 588 int sqlite3Fts3GetVarintBounded(const char*,const char*,sqlite3_int64*); 589 int sqlite3Fts3GetVarint32(const char *, int *); 590 int sqlite3Fts3VarintLen(sqlite3_uint64); 591 void sqlite3Fts3Dequote(char *); 592 void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); 593 int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); 594 int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); 595 void sqlite3Fts3CreateStatTable(int*, Fts3Table*); 596 int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); 597 int sqlite3Fts3ReadInt(const char *z, int *pnOut); 598 599 /* fts3_tokenizer.c */ 600 const char *sqlite3Fts3NextToken(const char *, int *); 601 int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *); 602 int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *, 603 sqlite3_tokenizer **, char ** 604 ); 605 int sqlite3Fts3IsIdChar(char); 606 607 /* fts3_snippet.c */ 608 void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); 609 void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, 610 const char *, const char *, int, int 611 ); 612 void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); 613 void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); 614 615 /* fts3_expr.c */ 616 int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, 617 char **, int, int, int, const char *, int, Fts3Expr **, char ** 618 ); 619 void sqlite3Fts3ExprFree(Fts3Expr *); 620 #ifdef SQLITE_TEST 621 int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash*); 622 int sqlite3Fts3InitTerm(sqlite3 *db); 623 #endif 624 void *sqlite3Fts3MallocZero(i64 nByte); 625 626 int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int, 627 sqlite3_tokenizer_cursor ** 628 ); 629 630 /* fts3_aux.c */ 631 int sqlite3Fts3InitAux(sqlite3 *db); 632 633 void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *); 634 635 int sqlite3Fts3MsrIncrStart( 636 Fts3Table*, Fts3MultiSegReader*, int, const char*, int); 637 int sqlite3Fts3MsrIncrNext( 638 Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *); 639 int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **); 640 int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *); 641 int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr); 642 643 /* fts3_tokenize_vtab.c */ 644 int sqlite3Fts3InitTok(sqlite3*, Fts3Hash *, void(*xDestroy)(void*)); 645 646 /* fts3_unicode2.c (functions generated by parsing unicode text files) */ 647 #ifndef SQLITE_DISABLE_FTS3_UNICODE 648 int sqlite3FtsUnicodeFold(int, int); 649 int sqlite3FtsUnicodeIsalnum(int); 650 int sqlite3FtsUnicodeIsdiacritic(int); 651 #endif 652 653 #endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */ 654 #endif /* _FTSINT_H */ 655