xref: /sqlite-3.40.0/test/fts4unicode.test (revision ab322bd2)
1# 2012 May 25
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#*************************************************************************
11#
12# The tests in this file focus on testing the "unicode" FTS tokenizer.
13#
14
15set testdir [file dirname $argv0]
16source $testdir/tester.tcl
17ifcapable !fts3 { finish_test ; return }
18set ::testprefix fts4unicode
19
20proc do_unicode_token_test {tn input res} {
21  set input [string map {' ''} $input]
22  uplevel [list do_execsql_test $tn "
23    SELECT fts3_tokenizer_test('unicode61', '$input');
24  " [list [list {*}$res]]]
25}
26
27do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
28do_unicode_token_test 1.1 {� � �} {0 � � 1 � � 2 � �}
29do_unicode_token_test 1.2 {x�x x�x x�x} {0 x�x x�x 1 x�x x�x 2 x�x x�x}
30
31# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
32do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
33do_unicode_token_test 1.4 "\u1E9E" "0 � \u1E9E"
34do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E"
35
36do_unicode_token_test 1.6 "The quick brown fox" {
37  0 the The 1 quick quick 2 brown brown 3 fox fox
38}
39do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" {
40  0 the The 1 quick quick 2 brown brown 3 fox fox
41}
42
43#-------------------------------------------------------------------------
44#
45set docs [list {
46  Enhance the INSERT syntax to allow multiple rows to be inserted via the
47  VALUES clause.
48} {
49  Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
50} {
51  Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
52} {
53  Added the sqlite3_db_readonly() interface.
54} {
55  Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
56  ability to add new PRAGMA statements or to override built-in PRAGMAs.
57} {
58  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
59  the same row that contains the maximum x value.
60} {
61  Added support for the FTS4 languageid option.
62} {
63  Documented support for the FTS4 content option. This feature has actually
64  been in the code since version 3.7.9 but is only now considered to be
65  officially supported.
66} {
67  Pending statements no longer block ROLLBACK. Instead, the pending statement
68  will return SQLITE_ABORT upon next access after the ROLLBACK.
69} {
70  Improvements to the handling of CSV inputs in the command-line shell
71} {
72  Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
73  incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
74  connected by OR.
75}]
76
77set map(a) [list "\u00C4" "\u00E4"]  ; # LATIN LETTER A WITH DIAERESIS
78set map(e) [list "\u00CB" "\u00EB"]  ; # LATIN LETTER E WITH DIAERESIS
79set map(i) [list "\u00CF" "\u00EF"]  ; # LATIN LETTER I WITH DIAERESIS
80set map(o) [list "\u00D6" "\u00F6"]  ; # LATIN LETTER O WITH DIAERESIS
81set map(u) [list "\u00DC" "\u00FC"]  ; # LATIN LETTER U WITH DIAERESIS
82set map(y) [list "\u0178" "\u00FF"]  ; # LATIN LETTER Y WITH DIAERESIS
83set map(h) [list "\u1E26" "\u1E27"]  ; # LATIN LETTER H WITH DIAERESIS
84set map(w) [list "\u1E84" "\u1E85"]  ; # LATIN LETTER W WITH DIAERESIS
85set map(x) [list "\u1E8C" "\u1E8D"]  ; # LATIN LETTER X WITH DIAERESIS
86foreach k [array names map] {
87  lappend mappings [string toupper $k] [lindex $map($k) 0]
88  lappend mappings $k [lindex $map($k) 1]
89}
90proc mapdoc {doc} {
91  set doc [regsub -all {[[:space:]]+} $doc " "]
92  string map $::mappings [string trim $doc]
93}
94
95do_test 2.0 {
96  execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); }
97  foreach doc $docs {
98    set d [mapdoc $doc]
99    execsql { INSERT INTO t2 VALUES($d) }
100  }
101} {}
102
103do_test 2.1 {
104  set q [mapdoc "row"]
105  execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
106} [list [mapdoc {
107  Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
108  the same row that contains the maximum x value.
109}]]
110
111foreach {tn query snippet} {
112  2 "row" {
113     ...returns the value of y on the same [row] that contains
114     the maximum x value.
115  }
116  3 "ROW" {
117     ...returns the value of y on the same [row] that contains
118     the maximum x value.
119  }
120  4 "rollback" {
121     ...[ROLLBACK]. Instead, the pending statement
122     will return SQLITE_ABORT upon next access after the [ROLLBACK].
123  }
124  5 "rOllback" {
125     ...[ROLLBACK]. Instead, the pending statement
126     will return SQLITE_ABORT upon next access after the [ROLLBACK].
127  }
128  6 "lang*" {
129     Added support for the FTS4 [languageid] option.
130  }
131} {
132  do_test 2.$tn {
133    set q [mapdoc $query]
134    execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q }
135  } [list [mapdoc $snippet]]
136}
137
138finish_test
139
140