Home
last modified time | relevance | path

Searched refs:tokenizer (Results 1 – 25 of 31) sorted by relevance

12

/sqlite-3.40.0/ext/fts2/
H A DREADME.tokenizers5 the text tokenizer implementation to be used when indexing text
6 by specifying a "tokenizer" clause as part of the CREATE VIRTUAL TABLE
10 <columns ...> [, tokenizer <tokenizer-name> [<tokenizer-args>]]
17 arguments to pass to the selected tokenizer implementation. The
19 tokenizer.
27 Registering a new FTS2 tokenizer is similar to registering a new
35 tokenizer types with a database handle. Instead, the pointer must
41 SELECT fts2_tokenizer(<tokenizer-name>);
44 Where <tokenizer-name> is a string identifying the tokenizer and
47 it is registered as tokenizer <tokenizer-name> and a copy of it
[all …]
/sqlite-3.40.0/ext/fts3/
H A DREADME.tokenizers5 the text tokenizer implementation to be used when indexing text
10 <columns ...> [, tokenize <tokenizer-name> [<tokenizer-args>]]
17 arguments to pass to the selected tokenizer implementation. The
19 tokenizer.
23 FTS3 allows users to provide custom tokenizer implementations. The
27 Registering a new FTS3 tokenizer is similar to registering a new
35 tokenizer types with a database handle. Instead, the pointer must
41 SELECT fts3_tokenizer(<tokenizer-name>);
44 Where <tokenizer-name> is a string identifying the tokenizer and
47 it is registered as tokenizer <tokenizer-name> and a copy of it
[all …]
/sqlite-3.40.0/ext/fts5/test/
H A Dfts5tokenizer.test48 } {1 {no such tokenizer: nosuch}}
52 } {1 {error in tokenizer constructor}}
78 " {1 {error in tokenizer constructor}}
138 } {1 {error in tokenizer constructor}}
141 } {1 {error in tokenizer constructor}}
186 } {1 {error in tokenizer constructor}}
189 } {1 {error in tokenizer constructor}}
194 } {1 {error in tokenizer constructor}}
199 } {1 {error in tokenizer constructor}}
202 # Porter tokenizer with very large tokens.
[all …]
H A Dfts5unicode2.test12 # The tests in this file focus on testing the "unicode" FTS tokenizer.
39 set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
43 sqlite3_fts5_tokenize -subst db $tokenizer $input
183 # Make sure the unicode61 tokenizer does not crash if it is passed a
361 proc do_tokenize {tokenizer txt} {
363 foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
372 # using tokenizer $tokenizer. The test passes if the tokenizer successfully
375 proc do_isspace_test {tn tokenizer lCp} {
378 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
H A Dfts5unicode.test24 proc tokenize_test {tn tokenizer input output} {
27 foreach {z s e} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
42 # Check that "unicode61" really is the default tokenizer.
H A Dfts5trigram.test12 # Tests for the fts5 "trigram" tokenizer.
128 } {1 {error in tokenizer constructor}}
131 } {1 {error in tokenizer constructor}}
H A Dfts5ea.test84 # Experiment with a tokenizer that considers " to be a token character.
91 # Experiment with a tokenizer that considers " to be a token character.
H A Dfts5fault6.test105 # OOM in the ASCII tokenizer with very large tokens.
107 # Also the unicode tokenizer.
138 # OOM while initializing a unicode61 tokenizer.
H A Dfts5tok2.test20 # Simple test cases. Using the default (ascii) tokenizer.
H A Dfts5synonym.test41 # Test a tokenizer that supports synonyms by adding extra entries to the
72 # 3.1.*: A tokenizer that declares the very first token to be colocated.
74 # 3.2.*: A tokenizer that reports two identical tokens at the same position.
H A Dfts5tok1.test21 # Simple test cases. Using the default (ascii) tokenizer.
H A Dfts5fault9.test84 # Test OOM injection with the xPhraseFirstColumn() API and a tokenizer
/sqlite-3.40.0/test/
H A Dfts3atoken.test46 # 1: Verify that there is no such fts3 tokenizer as 'blah'.
49 # retrieved value as tokenizer 'blah'.
55 # tokenizer 'blah' (it was not possible in step 1).
64 } {1 {unknown tokenizer: blah}}
163 } {1 {unknown tokenizer: nosuchtokenizer}}
168 # to test the test function as the tokenizer implementations.
262 # Test empty tokenizer names.
266 } {1 {unknown tokenizer: }}
269 } {1 {unknown tokenizer: }}
272 } {1 {unknown tokenizer: }}
[all …]
H A Dfts2token.test44 # 1: Verify that there is no such fts2 tokenizer as 'blah'.
46 # 2: Query for the built-in tokenizer 'simple'. Insert a copy of the
47 # retrieved value as tokenizer 'blah'.
49 # 3: Test that the value returned for tokenizer 'blah' is now the
53 # tokenizer 'blah' (it was not possible in step 1).
55 # 5: Test that the table created to use tokenizer 'blah' is usable.
61 } {1 {unknown tokenizer: blah}}
94 } {1 {unknown tokenizer: nosuchtokenizer}}
99 # to test the test function as the tokenizer implementations.
120 # Test cases fts2token-4.* test the ICU tokenizer. In practice, this
[all …]
H A Dfts3expr4.test27 proc test_fts3expr {tokenizer expr} {
28 db one {SELECT fts3_exprtest($tokenizer, $expr, 'a', 'b', 'c')}
50 # is passed to the tokenizer.
H A Dfts4unicode.test12 # The tests in this file focus on testing the "unicode" FTS tokenizer.
174 # Make sure the unicode61 tokenizer does not crash if it is passed a
339 proc do_tokenize {tokenizer txt} {
341 foreach {a b c} [db one {SELECT fts3_tokenizer_test($tokenizer, $txt)}] {
350 # using tokenizer $tokenizer. The test passes if the tokenizer successfully
353 proc do_isspace_test {tn tokenizer lCp} {
356 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
H A Dfts3tok1.test22 # Simple test cases. Using the default (simple) tokenizer.
106 } {1 {unknown tokenizer: nosuchtokenizer}}
120 } {1 {unknown tokenizer}}
H A Dtokenize.test12 # focus of this script testing the tokenizer
H A Dfts1k.test37 } "unknown tokenizer: \x80"
H A Dfts2l.test37 } "unknown tokenizer: \x80"
H A Dfts3al.test37 } "unknown tokenizer: \x80"
H A Dfts3malloc.test37 # OOM in tokenizer code have been fixed.
61 } {unknown tokenizer: unknown}
H A Dfts4langid.test42 # 4.* - Test that if one is provided, the tokenizer xLanguage method
43 # is called to configure the tokenizer before tokenizing query
/sqlite-3.40.0/ext/fts5/
H A Dfts5_tcl.c692 fts5_tokenizer tokenizer; in f5tTokenize() local
723 rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer); in f5tTokenize()
729 rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok); in f5tTokenize()
740 rc = tokenizer.xTokenize( in f5tTokenize()
743 tokenizer.xDelete(pTok); in f5tTokenize()
H A Dfts5_tokenize.c552 fts5_tokenizer tokenizer; /* Parent tokenizer module */ member
564 p->tokenizer.xDelete(p->pTokenizer); in fts5PorterDelete()
591 rc = pApi->xFindTokenizer(pApi, zBase, &pUserdata, &pRet->tokenizer); in fts5PorterCreate()
598 rc = pRet->tokenizer.xCreate(pUserdata, azArg2, nArg2, &pRet->pTokenizer); in fts5PorterCreate()
1256 return p->tokenizer.xTokenize( in fts5PorterTokenize()

12