1# 2012 May 25 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#************************************************************************* 11# 12# The tests in this file focus on testing the "unicode" FTS tokenizer. 13# 14# This is a modified copy of FTS4 test file "fts4_unicode.test". 15# 16 17source [file join [file dirname [info script]] fts5_common.tcl] 18set testprefix fts5unicode2 19 20# If SQLITE_ENABLE_FTS5 is defined, omit this file. 21ifcapable !fts5 { 22 finish_test 23 return 24} 25 26proc do_unicode_token_test {tn input res} { 27 uplevel [list do_test $tn [list \ 28 sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input 29 ] [list {*}$res]] 30} 31 32proc do_unicode_token_test2 {tn input res} { 33 uplevel [list do_test $tn [list \ 34 sqlite3_fts5_tokenize -subst db "unicode61" $input 35 ] [list {*}$res]] 36} 37 38proc do_unicode_token_test3 {tn args} { 39 set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]] 40 set input [lindex $args end-1] 41 set res [lindex $args end] 42 uplevel [list do_test $tn [list \ 43 sqlite3_fts5_tokenize -subst db $tokenizer $input 44 ] [list {*}$res]] 45} 46 47do_unicode_token_test 1.0 {a B c D} {a a b B c c d D} 48 49do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ 50 "\uE4 \uC4 \uF6 \uD6 \uFC \uDC" 51 52do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ 53 "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx" 54 55# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. 56do_unicode_token_test 1.3 "\uDF" "\uDF \uDF" 57do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E" 58 59do_unicode_token_test 1.5 "The quick brown fox" { 60 the The quick quick brown brown fox fox 61} 62do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { 63 the The quick quick brown brown fox fox 64} 65 66do_unicode_token_test2 1.7 {a B c D} {a a b B c c d D} 67do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC" 68 69do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ 70 "xax x\uC4x xox x\uD6x xux x\uDCx" 71 72# Check that diacritics are removed if remove_diacritics=1 is specified. 73# And that they do not break tokens. 74do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx" 75 76# Title-case mappings work 77do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5" 78 79do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \ 80 "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3" 81 82do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \ 83 "abc abc def def" 84 85#------------------------------------------------------------------------- 86# 87set docs [list { 88 Enhance the INSERT syntax to allow multiple rows to be inserted via the 89 VALUES clause. 90} { 91 Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. 92} { 93 Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). 94} { 95 Added the sqlite3_db_readonly() interface. 96} { 97 Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the 98 ability to add new PRAGMA statements or to override built-in PRAGMAs. 99} { 100 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on 101 the same row that contains the maximum x value. 102} { 103 Added support for the FTS4 languageid option. 104} { 105 Documented support for the FTS4 content option. This feature has actually 106 been in the code since version 3.7.9 but is only now considered to be 107 officially supported. 108} { 109 Pending statements no longer block ROLLBACK. Instead, the pending statement 110 will return SQLITE_ABORT upon next access after the ROLLBACK. 111} { 112 Improvements to the handling of CSV inputs in the command-line shell 113} { 114 Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be 115 incorrectly converted into an INNER JOIN if the WHERE clause indexable terms 116 connected by OR. 117}] 118 119set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS 120set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS 121set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS 122set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS 123set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS 124set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS 125set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS 126set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS 127set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS 128foreach k [array names map] { 129 lappend mappings [string toupper $k] [lindex $map($k) 0] 130 lappend mappings $k [lindex $map($k) 1] 131} 132proc mapdoc {doc} { 133 set doc [regsub -all {[[:space:]]+} $doc " "] 134 string map $::mappings [string trim $doc] 135} 136 137do_test 2.0 { 138 execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); } 139 foreach doc $docs { 140 set d [mapdoc $doc] 141 execsql { INSERT INTO t2 VALUES($d) } 142 } 143} {} 144 145do_test 2.1 { 146 set q [mapdoc "row"] 147 execsql { SELECT * FROM t2 WHERE t2 MATCH $q } 148} [list [mapdoc { 149 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on 150 the same row that contains the maximum x value. 151}]] 152 153foreach {tn query snippet} { 154 2 "row" { 155 ...returns the value of y on the same [row] that contains 156 the maximum x value. 157 } 158 3 "ROW" { 159 ...returns the value of y on the same [row] that contains 160 the maximum x value. 161 } 162 4 "rollback" { 163 Pending statements no longer block [ROLLBACK]. Instead, the pending 164 statement will return SQLITE_ABORT upon... 165 } 166 5 "rOllback" { 167 Pending statements no longer block [ROLLBACK]. Instead, the pending 168 statement will return SQLITE_ABORT upon... 169 } 170 6 "lang*" { 171 Added support for the FTS4 [languageid] option. 172 } 173} { 174 do_test 2.$tn { 175 set q [mapdoc $query] 176 execsql { 177 SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q 178 } 179 } [list [mapdoc $snippet]] 180} 181 182#------------------------------------------------------------------------- 183# Make sure the unicode61 tokenizer does not crash if it is passed a 184# NULL pointer. 185reset_db 186do_execsql_test 3.1 { 187 CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y); 188 INSERT INTO t1 VALUES(NULL, 'a b c'); 189} 190 191do_execsql_test 3.2 { 192 SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b' 193} {{a [b] c}} 194 195do_execsql_test 3.3 { 196 BEGIN; 197 DELETE FROM t1; 198 INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b'); 199 INSERT INTO t1 SELECT * FROM t1; 200 INSERT INTO t1 SELECT * FROM t1; 201 INSERT INTO t1 SELECT * FROM t1; 202 INSERT INTO t1 SELECT * FROM t1; 203 INSERT INTO t1 SELECT * FROM t1; 204 INSERT INTO t1 SELECT * FROM t1; 205 INSERT INTO t1 SELECT * FROM t1; 206 INSERT INTO t1 SELECT * FROM t1; 207 INSERT INTO t1 SELECT * FROM t1; 208 INSERT INTO t1 SELECT * FROM t1; 209 INSERT INTO t1 SELECT * FROM t1; 210 INSERT INTO t1 SELECT * FROM t1; 211 INSERT INTO t1 SELECT * FROM t1; 212 INSERT INTO t1 SELECT * FROM t1; 213 INSERT INTO t1 SELECT * FROM t1; 214 INSERT INTO t1 SELECT * FROM t1; 215 INSERT INTO t1 VALUES('a b c', NULL); 216 INSERT INTO t1 VALUES('a x c', NULL); 217 COMMIT; 218} 219 220do_execsql_test 3.4 { 221 SELECT * FROM t1 WHERE t1 MATCH 'a b'; 222} {{a b c} {}} 223 224#------------------------------------------------------------------------- 225# 226reset_db 227 228do_test 4.1 { 229 set a "abc\uFFFEdef" 230 set b "abc\uD800def" 231 set c "\uFFFEdef" 232 set d "\uD800def" 233 execsql { 234 CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x); 235 INSERT INTO t1 VALUES($a); 236 INSERT INTO t1 VALUES($b); 237 INSERT INTO t1 VALUES($c); 238 INSERT INTO t1 VALUES($d); 239 } 240 241 execsql "CREATE VIRTUAL TABLE t8 USING fts5( 242 a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\" 243 )" 244} {} 245 246do_test 4.2 { 247 set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}] 248 set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}] 249 set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] 250 set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}] 251 execsql { 252 INSERT INTO t1 VALUES($a); 253 INSERT INTO t1 VALUES($b); 254 INSERT INTO t1 VALUES($c); 255 INSERT INTO t1 VALUES($d); 256 } 257} {} 258 259do_test 4.3 { 260 set a [binary format c* {0xF7 0xBF 0xBF 0xBF}] 261 set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}] 262 set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}] 263 set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}] 264 execsql { 265 INSERT INTO t1 VALUES($a); 266 INSERT INTO t1 VALUES($b); 267 INSERT INTO t1 VALUES($c); 268 INSERT INTO t1 VALUES($d); 269 } 270} {} 271 272do_test 4.4 { 273 sqlite3_exec_hex db { 274 CREATE VIRTUAL TABLE t9 USING fts5(a, b, 275 tokenize="unicode61 separators '%C09004'" 276 ); 277 INSERT INTO t9(a) VALUES('abc%88def %89ghi%90'); 278 } 279} {0 {}} 280 281 282#------------------------------------------------------------------------- 283 284do_unicode_token_test3 5.1 {tokenchars {}} { 285 sqlite3_reset sqlite3_column_int 286} { 287 sqlite3 sqlite3 288 reset reset 289 sqlite3 sqlite3 290 column column 291 int int 292} 293 294do_unicode_token_test3 5.2 {tokenchars _} { 295 sqlite3_reset sqlite3_column_int 296} { 297 sqlite3_reset sqlite3_reset 298 sqlite3_column_int sqlite3_column_int 299} 300 301do_unicode_token_test3 5.3 {separators xyz} { 302 Laotianxhorseyrunszfast 303} { 304 laotian Laotian 305 horse horse 306 runs runs 307 fast fast 308} 309 310do_unicode_token_test3 5.4 {tokenchars xyz} { 311 Laotianxhorseyrunszfast 312} { 313 laotianxhorseyrunszfast Laotianxhorseyrunszfast 314} 315 316do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} { 317 sqlite3_resetxsqlite3_column_intyhonda_phantom 318} { 319 sqlite3_reset sqlite3_reset 320 sqlite3_column_int sqlite3_column_int 321 honda_phantom honda_phantom 322} 323 324do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" { 325 abc abc def def 326} 327 328do_unicode_token_test3 5.7 \ 329 "tokenchars \u2444\u2445" \ 330 "separators \u05D0\u05D1\u05D2" \ 331 "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \ 332 [list \ 333 \u2444fre\u2445sh \u2444fre\u2445sh \ 334 water water \ 335 fish fish \ 336 \u2445timer \u2445timer \ 337 ] 338 339# Check that it is not possible to add a standalone diacritic codepoint 340# to either separators or tokenchars. 341do_unicode_token_test3 5.8 "separators \u0301" \ 342 "hello\u0301world \u0301helloworld" \ 343 "helloworld hello\u0301world helloworld helloworld" 344 345do_unicode_token_test3 5.9 "tokenchars \u0301" \ 346 "hello\u0301world \u0301helloworld" \ 347 "helloworld hello\u0301world helloworld helloworld" 348 349do_unicode_token_test3 5.10 "separators \u0301" \ 350 "remove_diacritics 0" \ 351 "hello\u0301world \u0301helloworld" \ 352 "hello\u0301world hello\u0301world helloworld helloworld" 353 354do_unicode_token_test3 5.11 "tokenchars \u0301" \ 355 "remove_diacritics 0" \ 356 "hello\u0301world \u0301helloworld" \ 357 "hello\u0301world hello\u0301world helloworld helloworld" 358 359#------------------------------------------------------------------------- 360 361proc do_tokenize {tokenizer txt} { 362 set res [list] 363 foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] { 364 lappend res $b 365 } 366 set res 367} 368 369# Argument $lCodepoint must be a list of codepoints (integers) that 370# correspond to whitespace characters. This command creates a string 371# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}" 372# using tokenizer $tokenizer. The test passes if the tokenizer successfully 373# extracts the two 5 character tokens. 374# 375proc do_isspace_test {tn tokenizer lCp} { 376 set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp] 377 set txt "${whitespace}hello${whitespace}world${whitespace}" 378 uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}] 379} 380 381set tokenizers [list unicode61] 382#ifcapable icu { lappend tokenizers icu } 383 384# Some tests to check that the tokenizers can both identify white-space 385# codepoints. All codepoints tested below are of type "Zs" in the 386# UnicodeData.txt file. 387foreach T $tokenizers { 388 do_isspace_test 6.$T.1 $T 32 389 do_isspace_test 6.$T.2 $T 160 390 do_isspace_test 6.$T.3 $T 5760 391 do_isspace_test 6.$T.4 $T 6158 392 do_isspace_test 6.$T.5 $T 8192 393 do_isspace_test 6.$T.6 $T 8193 394 do_isspace_test 6.$T.7 $T 8194 395 do_isspace_test 6.$T.8 $T 8195 396 do_isspace_test 6.$T.9 $T 8196 397 do_isspace_test 6.$T.10 $T 8197 398 do_isspace_test 6.$T.11 $T 8198 399 do_isspace_test 6.$T.12 $T 8199 400 do_isspace_test 6.$T.13 $T 8200 401 do_isspace_test 6.$T.14 $T 8201 402 do_isspace_test 6.$T.15 $T 8202 403 do_isspace_test 6.$T.16 $T 8239 404 do_isspace_test 6.$T.17 $T 8287 405 do_isspace_test 6.$T.18 $T 12288 406 407 do_isspace_test 6.$T.19 $T {32 160 5760 6158} 408 do_isspace_test 6.$T.20 $T {8192 8193 8194 8195} 409 do_isspace_test 6.$T.21 $T {8196 8197 8198 8199} 410 do_isspace_test 6.$T.22 $T {8200 8201 8202 8239} 411 do_isspace_test 6.$T.23 $T {8287 12288} 412} 413 414 415#------------------------------------------------------------------------- 416# Test that the private use ranges are treated as alphanumeric. 417# 418foreach {tn1 c} { 419 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff 420} { 421 foreach {tn2 config res} { 422 1 "" "hello*world hello*world" 423 2 "separators *" "hello hello world world" 424 } { 425 set config [string map [list * $c] $config] 426 set input [string map [list * $c] "hello*world"] 427 set output [string map [list * $c] $res] 428 do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output 429 } 430} 431 432#------------------------------------------------------------------------- 433# Cursory test of remove_diacritics=0. 434# 435# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS 436# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS 437# 00E4;LATIN SMALL LETTER A WITH DIAERESIS 438# 00F6;LATIN SMALL LETTER O WITH DIAERESIS 439# 440do_execsql_test 8.1.1 " 441 CREATE VIRTUAL TABLE t3 USING fts5( 442 content, tokenize='unicode61 remove_diacritics 1' 443 ); 444 INSERT INTO t3 VALUES('o'); 445 INSERT INTO t3 VALUES('a'); 446 INSERT INTO t3 VALUES('O'); 447 INSERT INTO t3 VALUES('A'); 448 INSERT INTO t3 VALUES('\xD6'); 449 INSERT INTO t3 VALUES('\xC4'); 450 INSERT INTO t3 VALUES('\xF6'); 451 INSERT INTO t3 VALUES('\xE4'); 452" 453do_execsql_test 8.1.2 { 454 SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC; 455} {1 3 5 7} 456do_execsql_test 8.1.3 { 457 SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC; 458} {2 4 6 8} 459do_execsql_test 8.2.1 { 460 CREATE VIRTUAL TABLE t4 USING fts5( 461 content, tokenize='unicode61 remove_diacritics 0' 462 ); 463 INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC; 464} 465do_execsql_test 8.2.2 { 466 SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC; 467} {1 3} 468do_execsql_test 8.2.3 { 469 SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC; 470} {2 4} 471 472#------------------------------------------------------------------------- 473# 474if 0 { 475foreach {tn sql} { 476 1 { 477 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]); 478 CREATE VIRTUAL TABLE t6 USING fts4( 479 tokenize=unicode61 [tokenchars=="] "tokenchars=[]"); 480 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]); 481 } 482 2 { 483 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= ."); 484 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]"); 485 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4"); 486 } 487 3 { 488 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .'); 489 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]'); 490 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4'); 491 } 492 4 { 493 CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`); 494 CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`); 495 CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`); 496 } 497} { 498 do_execsql_test 9.$tn.0 { 499 DROP TABLE IF EXISTS t5; 500 DROP TABLE IF EXISTS t5aux; 501 DROP TABLE IF EXISTS t6; 502 DROP TABLE IF EXISTS t6aux; 503 DROP TABLE IF EXISTS t7; 504 DROP TABLE IF EXISTS t7aux; 505 } 506 do_execsql_test 9.$tn.1 $sql 507 508 do_execsql_test 9.$tn.2 { 509 CREATE VIRTUAL TABLE t5aux USING fts4aux(t5); 510 INSERT INTO t5 VALUES('one two three/four.five.six'); 511 SELECT * FROM t5aux; 512 } { 513 four.five.six * 1 1 four.five.six 0 1 1 514 {one two three} * 1 1 {one two three} 0 1 1 515 } 516 517 do_execsql_test 9.$tn.3 { 518 CREATE VIRTUAL TABLE t6aux USING fts4aux(t6); 519 INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta'); 520 SELECT * FROM t6aux; 521 } { 522 {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1 523 {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1 524 } 525 526 do_execsql_test 9.$tn.4 { 527 CREATE VIRTUAL TABLE t7aux USING fts4aux(t7); 528 INSERT INTO t7 VALUES('alephxbeth\xC4gimel'); 529 SELECT * FROM t7aux; 530 } { 531 aleph * 1 1 aleph 0 1 1 532 beth * 1 1 beth 0 1 1 533 gimel * 1 1 gimel 0 1 1 534 } 535} 536 537# Check that multiple options are handled correctly. 538# 539do_execsql_test 10.1 { 540 DROP TABLE IF EXISTS t1; 541 CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61 542 "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy" 543 "separators=a" "separators=a" "tokenchars=a" "tokenchars=a" 544 ); 545 546 INSERT INTO t1 VALUES('oneatwoxthreeyfour'); 547 INSERT INTO t1 VALUES('a.single=word'); 548 CREATE VIRTUAL TABLE t1aux USING fts4aux(t1); 549 SELECT * FROM t1aux; 550} { 551 .single=word * 1 1 .single=word 0 1 1 552 four * 1 1 four 0 1 1 553 one * 1 1 one 0 1 1 554 three * 1 1 three 0 1 1 555 two * 1 1 two 0 1 1 556} 557 558# Test that case folding happens after tokenization, not before. 559# 560do_execsql_test 10.2 { 561 DROP TABLE IF EXISTS t2; 562 CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB"); 563 INSERT INTO t2 VALUES('oneatwoBthree'); 564 INSERT INTO t2 VALUES('onebtwoAthree'); 565 CREATE VIRTUAL TABLE t2aux USING fts4aux(t2); 566 SELECT * FROM t2aux; 567} { 568 one * 1 1 one 0 1 1 569 onebtwoathree * 1 1 onebtwoathree 0 1 1 570 three * 1 1 three 0 1 1 571 two * 1 1 two 0 1 1 572} 573 574# Test that the tokenchars and separators options work with the 575# fts3tokenize table. 576# 577do_execsql_test 11.1 { 578 CREATE VIRTUAL TABLE ft1 USING fts3tokenize( 579 "unicode61", "tokenchars=@.", "separators=1234567890" 580 ); 581 SELECT token FROM ft1 WHERE input = '[email protected]'; 582} { 583 berlin@street sydney.road 584} 585 586} 587 588finish_test 589