1# 2012 May 25
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#*************************************************************************
11#
12# The tests in this file focus on testing the "unicode" FTS tokenizer.
13#
14# This is a modified copy of FTS4 test file "fts4_unicode.test".
15#
16
17source [file join [file dirname [info script]] fts5_common.tcl]
18set testprefix fts5unicode2
19
20# If SQLITE_ENABLE_FTS5 is defined, omit this file.
21ifcapable !fts5 {
22  finish_test
23  return
24}
25
26proc do_unicode_token_test {tn input res} {
27  uplevel [list do_test $tn [list \
28    sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
29  ] [list {*}$res]]
30}
31
32proc do_unicode_token_test2 {tn input res} {
33  uplevel [list do_test $tn [list \
34    sqlite3_fts5_tokenize -subst db "unicode61" $input
35  ] [list {*}$res]]
36}
37
38proc do_unicode_token_test3 {tn args} {
39  set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
40  set input [lindex $args end-1]
41  set res [lindex $args end]
42  uplevel [list do_test $tn [list \
43    sqlite3_fts5_tokenize -subst db $tokenizer $input
44  ] [list {*}$res]]
45}
46
47do_unicode_token_test 1.0 {a B c D} {a a b B c c d D}
48
49do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
50    "\uE4 \uC4 \uF6 \uD6 \uFC \uDC"
51
52do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
53    "x\uE4x x\uC4x x\uF6x x\uD6x x\uFCx x\uDCx"
54
55# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
56do_unicode_token_test 1.3 "\uDF" "\uDF \uDF"
57do_unicode_token_test 1.4 "\u1E9E" "\uDF \u1E9E"
58
59do_unicode_token_test 1.5 "The quick brown fox" {
60  the The quick quick brown brown fox fox
61}
62do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
63  the The quick quick brown brown fox fox
64}
65
66do_unicode_token_test2 1.7  {a B c D} {a a b B c c d D}
67do_unicode_token_test2 1.8  "\uC4 \uD6 \uDC" "a \uC4 o \uD6 u \uDC"
68
69do_unicode_token_test2 1.9  "x\uC4x x\uD6x x\uDCx" \
70    "xax x\uC4x xox x\uD6x xux x\uDCx"
71
72# Check that diacritics are removed if remove_diacritics=1 is specified.
73# And that they do not break tokens.
74do_unicode_token_test2 1.10 "xx\u0301xx" "xxxx xx\u301xx"
75
76# Title-case mappings work
77do_unicode_token_test 1.11 "\u01c5" "\u01c6 \u01c5"
78
79do_unicode_token_test 1.12 "\u00C1abc\u00C2 \u00D1def\u00C3" \
80    "\u00E1abc\u00E2 \u00C1abc\u00C2 \u00F1def\u00E3 \u00D1def\u00C3"
81
82do_unicode_token_test 1.13 "\u00A2abc\u00A3 \u00A4def\u00A5" \
83    "abc abc def def"
84
85#-------------------------------------------------------------------------
86#
87set docs [list {
88  Enhance the INSERT syntax to allow multiple rows to be inserted via the
89  VALUES clause.
90} {
91  Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
92} {
93  Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
94} {
95  Added the sqlite3_db_readonly() interface.
96} {
97  Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
98  ability to add new PRAGMA statements or to override built-in PRAGMAs.
99} {
100  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
101  the same row that contains the maximum x value.
102} {
103  Added support for the FTS4 languageid option.
104} {
105  Documented support for the FTS4 content option. This feature has actually
106  been in the code since version 3.7.9 but is only now considered to be
107  officially supported.
108} {
109  Pending statements no longer block ROLLBACK. Instead, the pending statement
110  will return SQLITE_ABORT upon next access after the ROLLBACK.
111} {
112  Improvements to the handling of CSV inputs in the command-line shell
113} {
114  Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
115  incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
116  connected by OR.
117}]
118
119set map(a) [list "\u00C4" "\u00E4"]  ; # LATIN LETTER A WITH DIAERESIS
120set map(e) [list "\u00CB" "\u00EB"]  ; # LATIN LETTER E WITH DIAERESIS
121set map(i) [list "\u00CF" "\u00EF"]  ; # LATIN LETTER I WITH DIAERESIS
122set map(o) [list "\u00D6" "\u00F6"]  ; # LATIN LETTER O WITH DIAERESIS
123set map(u) [list "\u00DC" "\u00FC"]  ; # LATIN LETTER U WITH DIAERESIS
124set map(y) [list "\u0178" "\u00FF"]  ; # LATIN LETTER Y WITH DIAERESIS
125set map(h) [list "\u1E26" "\u1E27"]  ; # LATIN LETTER H WITH DIAERESIS
126set map(w) [list "\u1E84" "\u1E85"]  ; # LATIN LETTER W WITH DIAERESIS
127set map(x) [list "\u1E8C" "\u1E8D"]  ; # LATIN LETTER X WITH DIAERESIS
128foreach k [array names map] {
129  lappend mappings [string toupper $k] [lindex $map($k) 0]
130  lappend mappings $k [lindex $map($k) 1]
131}
132proc mapdoc {doc} {
133  set doc [regsub -all {[[:space:]]+} $doc " "]
134  string map $::mappings [string trim $doc]
135}
136
137do_test 2.0 {
138  execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
139  foreach doc $docs {
140    set d [mapdoc $doc]
141    execsql { INSERT INTO t2 VALUES($d) }
142  }
143} {}
144
145do_test 2.1 {
146  set q [mapdoc "row"]
147  execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
148} [list [mapdoc {
149  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
150  the same row that contains the maximum x value.
151}]]
152
153foreach {tn query snippet} {
154  2 "row" {
155     ...returns the value of y on the same [row] that contains
156     the maximum x value.
157  }
158  3 "ROW" {
159     ...returns the value of y on the same [row] that contains
160     the maximum x value.
161  }
162  4 "rollback" {
163     Pending statements no longer block [ROLLBACK]. Instead, the pending
164     statement will return SQLITE_ABORT upon...
165  }
166  5 "rOllback" {
167     Pending statements no longer block [ROLLBACK]. Instead, the pending
168     statement will return SQLITE_ABORT upon...
169  }
170  6 "lang*" {
171     Added support for the FTS4 [languageid] option.
172  }
173} {
174  do_test 2.$tn {
175    set q [mapdoc $query]
176    execsql {
177      SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
178    }
179  } [list [mapdoc $snippet]]
180}
181
182#-------------------------------------------------------------------------
183# Make sure the unicode61 tokenizer does not crash if it is passed a
184# NULL pointer.
185reset_db
186do_execsql_test 3.1 {
187  CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
188  INSERT INTO t1 VALUES(NULL, 'a b c');
189}
190
191do_execsql_test 3.2 {
192  SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
193} {{a [b] c}}
194
195do_execsql_test 3.3 {
196  BEGIN;
197  DELETE FROM t1;
198  INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
199  INSERT INTO t1 SELECT * FROM t1;
200  INSERT INTO t1 SELECT * FROM t1;
201  INSERT INTO t1 SELECT * FROM t1;
202  INSERT INTO t1 SELECT * FROM t1;
203  INSERT INTO t1 SELECT * FROM t1;
204  INSERT INTO t1 SELECT * FROM t1;
205  INSERT INTO t1 SELECT * FROM t1;
206  INSERT INTO t1 SELECT * FROM t1;
207  INSERT INTO t1 SELECT * FROM t1;
208  INSERT INTO t1 SELECT * FROM t1;
209  INSERT INTO t1 SELECT * FROM t1;
210  INSERT INTO t1 SELECT * FROM t1;
211  INSERT INTO t1 SELECT * FROM t1;
212  INSERT INTO t1 SELECT * FROM t1;
213  INSERT INTO t1 SELECT * FROM t1;
214  INSERT INTO t1 SELECT * FROM t1;
215  INSERT INTO t1 VALUES('a b c', NULL);
216  INSERT INTO t1 VALUES('a x c', NULL);
217  COMMIT;
218}
219
220do_execsql_test 3.4 {
221  SELECT * FROM t1 WHERE t1 MATCH 'a b';
222} {{a b c} {}}
223
224#-------------------------------------------------------------------------
225#
226reset_db
227
228do_test 4.1 {
229  set a "abc\uFFFEdef"
230  set b "abc\uD800def"
231  set c "\uFFFEdef"
232  set d "\uD800def"
233  execsql {
234    CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
235    INSERT INTO t1 VALUES($a);
236    INSERT INTO t1 VALUES($b);
237    INSERT INTO t1 VALUES($c);
238    INSERT INTO t1 VALUES($d);
239  }
240
241  execsql "CREATE VIRTUAL TABLE t8 USING fts5(
242      a, b, tokenize=\"unicode61 separators '\uFFFE\uD800\u00BF'\"
243  )"
244} {}
245
246do_test 4.2 {
247  set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
248  set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
249  set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
250  set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
251  execsql {
252    INSERT INTO t1 VALUES($a);
253    INSERT INTO t1 VALUES($b);
254    INSERT INTO t1 VALUES($c);
255    INSERT INTO t1 VALUES($d);
256  }
257} {}
258
259do_test 4.3 {
260  set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
261  set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
262  set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
263  set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
264  execsql {
265    INSERT INTO t1 VALUES($a);
266    INSERT INTO t1 VALUES($b);
267    INSERT INTO t1 VALUES($c);
268    INSERT INTO t1 VALUES($d);
269  }
270} {}
271
272do_test 4.4 {
273  sqlite3_exec_hex db {
274    CREATE VIRTUAL TABLE t9 USING fts5(a, b,
275      tokenize="unicode61 separators '%C09004'"
276    );
277    INSERT INTO t9(a) VALUES('abc%88def %89ghi%90');
278  }
279} {0 {}}
280
281
282#-------------------------------------------------------------------------
283
284do_unicode_token_test3 5.1 {tokenchars {}} {
285  sqlite3_reset sqlite3_column_int
286} {
287  sqlite3 sqlite3
288  reset reset
289  sqlite3 sqlite3
290  column column
291  int int
292}
293
294do_unicode_token_test3 5.2 {tokenchars _} {
295  sqlite3_reset sqlite3_column_int
296} {
297  sqlite3_reset sqlite3_reset
298  sqlite3_column_int sqlite3_column_int
299}
300
301do_unicode_token_test3 5.3 {separators xyz} {
302  Laotianxhorseyrunszfast
303} {
304  laotian Laotian
305  horse horse
306  runs runs
307  fast fast
308}
309
310do_unicode_token_test3 5.4 {tokenchars xyz} {
311  Laotianxhorseyrunszfast
312} {
313  laotianxhorseyrunszfast Laotianxhorseyrunszfast
314}
315
316do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
317  sqlite3_resetxsqlite3_column_intyhonda_phantom
318} {
319  sqlite3_reset sqlite3_reset
320  sqlite3_column_int sqlite3_column_int
321  honda_phantom honda_phantom
322}
323
324do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
325  abc abc def def
326}
327
328do_unicode_token_test3 5.7                             \
329  "tokenchars \u2444\u2445"                            \
330  "separators \u05D0\u05D1\u05D2"                      \
331  "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
332  [list                                                \
333    \u2444fre\u2445sh \u2444fre\u2445sh              \
334    water water                                      \
335    fish fish                                        \
336    \u2445timer \u2445timer                          \
337  ]
338
339# Check that it is not possible to add a standalone diacritic codepoint
340# to either separators or tokenchars.
341do_unicode_token_test3 5.8 "separators \u0301" \
342  "hello\u0301world \u0301helloworld"          \
343  "helloworld hello\u0301world helloworld helloworld"
344
345do_unicode_token_test3 5.9 "tokenchars \u0301" \
346  "hello\u0301world \u0301helloworld"          \
347  "helloworld hello\u0301world helloworld helloworld"
348
349do_unicode_token_test3 5.10 "separators \u0301" \
350  "remove_diacritics 0"                        \
351  "hello\u0301world \u0301helloworld"          \
352  "hello\u0301world hello\u0301world helloworld helloworld"
353
354do_unicode_token_test3 5.11 "tokenchars \u0301" \
355  "remove_diacritics 0"                         \
356  "hello\u0301world \u0301helloworld"           \
357  "hello\u0301world hello\u0301world helloworld helloworld"
358
359#-------------------------------------------------------------------------
360
361proc do_tokenize {tokenizer txt} {
362  set res [list]
363  foreach {b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
364    lappend res $b
365  }
366  set res
367}
368
369# Argument $lCodepoint must be a list of codepoints (integers) that
370# correspond to whitespace characters. This command creates a string
371# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
372# using tokenizer $tokenizer. The test passes if the tokenizer successfully
373# extracts the two 5 character tokens.
374#
375proc do_isspace_test {tn tokenizer lCp} {
376  set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
377  set txt "${whitespace}hello${whitespace}world${whitespace}"
378  uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
379}
380
381set tokenizers [list unicode61]
382#ifcapable icu { lappend tokenizers icu }
383
384# Some tests to check that the tokenizers can both identify white-space
385# codepoints. All codepoints tested below are of type "Zs" in the
386# UnicodeData.txt file.
387foreach T $tokenizers {
388  do_isspace_test 6.$T.1 $T    32
389  do_isspace_test 6.$T.2 $T    160
390  do_isspace_test 6.$T.3 $T    5760
391  do_isspace_test 6.$T.4 $T    6158
392  do_isspace_test 6.$T.5 $T    8192
393  do_isspace_test 6.$T.6 $T    8193
394  do_isspace_test 6.$T.7 $T    8194
395  do_isspace_test 6.$T.8 $T    8195
396  do_isspace_test 6.$T.9 $T    8196
397  do_isspace_test 6.$T.10 $T    8197
398  do_isspace_test 6.$T.11 $T    8198
399  do_isspace_test 6.$T.12 $T    8199
400  do_isspace_test 6.$T.13 $T    8200
401  do_isspace_test 6.$T.14 $T    8201
402  do_isspace_test 6.$T.15 $T    8202
403  do_isspace_test 6.$T.16 $T    8239
404  do_isspace_test 6.$T.17 $T    8287
405  do_isspace_test 6.$T.18 $T   12288
406
407  do_isspace_test 6.$T.19 $T   {32 160 5760 6158}
408  do_isspace_test 6.$T.20 $T   {8192 8193 8194 8195}
409  do_isspace_test 6.$T.21 $T   {8196 8197 8198 8199}
410  do_isspace_test 6.$T.22 $T   {8200 8201 8202 8239}
411  do_isspace_test 6.$T.23 $T   {8287 12288}
412}
413
414
415#-------------------------------------------------------------------------
416# Test that the private use ranges are treated as alphanumeric.
417#
418foreach {tn1 c} {
419  1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
420} {
421  foreach {tn2 config res} {
422    1 ""             "hello*world hello*world"
423    2 "separators *" "hello hello world world"
424  } {
425    set config [string map [list * $c] $config]
426    set input  [string map [list * $c] "hello*world"]
427    set output [string map [list * $c] $res]
428    do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
429  }
430}
431
432#-------------------------------------------------------------------------
433# Cursory test of remove_diacritics=0.
434#
435# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
436# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
437# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
438# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
439#
440do_execsql_test 8.1.1 "
441  CREATE VIRTUAL TABLE t3 USING fts5(
442    content, tokenize='unicode61 remove_diacritics 1'
443  );
444  INSERT INTO t3 VALUES('o');
445  INSERT INTO t3 VALUES('a');
446  INSERT INTO t3 VALUES('O');
447  INSERT INTO t3 VALUES('A');
448  INSERT INTO t3 VALUES('\xD6');
449  INSERT INTO t3 VALUES('\xC4');
450  INSERT INTO t3 VALUES('\xF6');
451  INSERT INTO t3 VALUES('\xE4');
452"
453do_execsql_test 8.1.2 {
454  SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
455} {1 3 5 7}
456do_execsql_test 8.1.3 {
457  SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
458} {2 4 6 8}
459do_execsql_test 8.2.1 {
460  CREATE VIRTUAL TABLE t4 USING fts5(
461    content, tokenize='unicode61 remove_diacritics 0'
462  );
463  INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
464}
465do_execsql_test 8.2.2 {
466  SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
467} {1 3}
468do_execsql_test 8.2.3 {
469  SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
470} {2 4}
471
472#-------------------------------------------------------------------------
473#
474if 0 {
475foreach {tn sql} {
476  1 {
477    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
478    CREATE VIRTUAL TABLE t6 USING fts4(
479        tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
480    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
481  }
482  2 {
483    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
484    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
485    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
486  }
487  3 {
488    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
489    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
490    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
491  }
492  4 {
493    CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
494    CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
495    CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
496  }
497} {
498  do_execsql_test 9.$tn.0 {
499    DROP TABLE IF EXISTS t5;
500    DROP TABLE IF EXISTS t5aux;
501    DROP TABLE IF EXISTS t6;
502    DROP TABLE IF EXISTS t6aux;
503    DROP TABLE IF EXISTS t7;
504    DROP TABLE IF EXISTS t7aux;
505  }
506  do_execsql_test 9.$tn.1 $sql
507
508  do_execsql_test 9.$tn.2 {
509    CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
510    INSERT INTO t5 VALUES('one two three/four.five.six');
511    SELECT * FROM t5aux;
512  } {
513    four.five.six   * 1 1 four.five.six   0 1 1
514    {one two three} * 1 1 {one two three} 0 1 1
515  }
516
517  do_execsql_test 9.$tn.3 {
518    CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
519    INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
520    SELECT * FROM t6aux;
521  } {
522    {alpha=beta"gamma}   * 1 1 {alpha=beta"gamma} 0 1 1
523    {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
524  }
525
526  do_execsql_test 9.$tn.4 {
527    CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
528    INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
529    SELECT * FROM t7aux;
530  } {
531    aleph * 1 1 aleph 0 1 1
532    beth  * 1 1 beth  0 1 1
533    gimel * 1 1 gimel 0 1 1
534  }
535}
536
537# Check that multiple options are handled correctly.
538#
539do_execsql_test 10.1 {
540  DROP TABLE IF EXISTS t1;
541  CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
542    "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
543    "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
544  );
545
546  INSERT INTO t1 VALUES('oneatwoxthreeyfour');
547  INSERT INTO t1 VALUES('a.single=word');
548  CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
549  SELECT * FROM t1aux;
550} {
551  .single=word * 1 1 .single=word 0 1 1
552  four         * 1 1 four         0 1 1
553  one          * 1 1 one          0 1 1
554  three        * 1 1 three        0 1 1
555  two          * 1 1 two          0 1 1
556}
557
558# Test that case folding happens after tokenization, not before.
559#
560do_execsql_test 10.2 {
561  DROP TABLE IF EXISTS t2;
562  CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
563  INSERT INTO t2 VALUES('oneatwoBthree');
564  INSERT INTO t2 VALUES('onebtwoAthree');
565  CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
566  SELECT * FROM t2aux;
567} {
568  one           * 1 1 one           0 1 1
569  onebtwoathree * 1 1 onebtwoathree 0 1 1
570  three         * 1 1 three         0 1 1
571  two           * 1 1 two           0 1 1
572}
573
574# Test that the tokenchars and separators options work with the
575# fts3tokenize table.
576#
577do_execsql_test 11.1 {
578  CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
579    "unicode61", "tokenchars=@.", "separators=1234567890"
580  );
581  SELECT token FROM ft1 WHERE input = '[email protected]';
582} {
583  berlin@street sydney.road
584}
585
586}
587
588finish_test
589