1# 2020 September 30
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#*************************************************************************
11#
12# Tests for the fts5 "trigram" tokenizer.
13#
14
15source [file join [file dirname [info script]] fts5_common.tcl]
16ifcapable !fts5 { finish_test ; return }
17set ::testprefix fts5trigram
18
19do_execsql_test 1.0 {
20  CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize=trigram);
21  INSERT INTO t1 VALUES('abcdefghijklm');
22  INSERT INTO t1 VALUES('กรุงเทพมหานคร');
23}
24
25foreach {tn s res} {
26  1 abc           "(abc)defghijklm"
27  2 defgh         "abc(defgh)ijklm"
28  3 abcdefghijklm "(abcdefghijklm)"
29  4 กรุ            "(กรุ)งเทพมหานคร"
30  5 งเทพมห        "กรุ(งเทพมห)านคร"
31  6 กรุงเทพมหานคร  "(กรุงเทพมหานคร)"
32  7 Abc           "(abc)defghijklm"
33  8 deFgh         "abc(defgh)ijklm"
34  9 aBcdefGhijKlm "(abcdefghijklm)"
35} {
36  do_execsql_test 1.1.$tn {
37    SELECT highlight(t1, 0, '(', ')') FROM t1($s)
38  } $res
39}
40
41do_execsql_test 1.2.0 {
42  SELECT fts5_expr('ABCD', 'tokenize=trigram')
43} {{"abc" + "bcd"}}
44
45do_execsql_test 1.2.1 {
46  SELECT * FROM t1 WHERE y LIKE ? ESCAPE 'a'
47}
48
49foreach {tn like res} {
50  1 {%cDef%}   1
51  2 {cDef%}    {}
52  3 {%f%}      1
53  4 {%f_h%}    1
54  5 {%f_g%}    {}
55  6 {abc%klm}  1
56  7 {ABCDEFG%} 1
57  8 {%รุงเ%}    2
58} {
59  do_execsql_test 1.3.$tn {
60    SELECT rowid FROM t1 WHERE y LIKE $like
61  } $res
62}
63
64#-------------------------------------------------------------------------
65reset_db
66do_execsql_test 2.0 {
67  CREATE VIRTUAL TABLE t1 USING fts5(y, tokenize="trigram case_sensitive 1");
68  INSERT INTO t1 VALUES('abcdefghijklm');
69  INSERT INTO t1 VALUES('กรุงเทพมหานคร');
70}
71
72foreach {tn s res} {
73  1 abc           "(abc)defghijklm"
74  2 defgh         "abc(defgh)ijklm"
75  3 abcdefghijklm "(abcdefghijklm)"
76  4 กรุ            "(กรุ)งเทพมหานคร"
77  5 งเทพมห        "กรุ(งเทพมห)านคร"
78  6 กรุงเทพมหานคร  "(กรุงเทพมหานคร)"
79  7 Abc           ""
80  8 deFgh         ""
81  9 aBcdefGhijKlm ""
82} {
83  do_execsql_test 2.1.$tn {
84    SELECT highlight(t1, 0, '(', ')') FROM t1($s)
85  } $res
86}
87foreach {tn like res} {
88  1 {%cDef%}   1
89  2 {cDef%}    {}
90  3 {%f%}      1
91  4 {%f_h%}    1
92  5 {%f_g%}    {}
93  6 {abc%klm}  1
94  7 {ABCDEFG%} 1
95  8 {%รุงเ%}    2
96} {
97  do_execsql_test 2.2.$tn {
98    SELECT rowid FROM t1 WHERE y LIKE $like
99  } $res
100}
101foreach {tn like res} {
102  1 {*cdef*}     1
103  2 {cdef*}      {}
104  3 {*f*}        1
105  4 {*f?h*}      1
106  5 {*f?g*}      {}
107  6 {abc*klm}    1
108  7 {abcdefg*}   1
109  8 {*รุงเ*}      2
110  9 {abc[d]efg*} 1
111 10 {abc[]d]efg*} 1
112 11 {abc[^]d]efg*} {}
113 12 {abc[^]XYZ]efg*} 1
114} {
115  do_execsql_test 2.3.$tn {
116    SELECT rowid FROM t1 WHERE y GLOB $like
117  } $res
118}
119
120do_execsql_test 2.3.null.1 {
121  SELECT rowid FROM t1 WHERE y LIKE NULL
122}
123
124#-------------------------------------------------------------------------
125reset_db
126do_catchsql_test 3.1 {
127  CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 2");
128} {1 {error in tokenizer constructor}}
129do_catchsql_test 3.2 {
130  CREATE VIRTUAL TABLE ttt USING fts5(c, tokenize="trigram case_sensitive 11");
131} {1 {error in tokenizer constructor}}
132do_catchsql_test 3.3 {
133  CREATE VIRTUAL TABLE ttt USING fts5(c, "tokenize=trigram case_sensitive 1");
134} {0 {}}
135
136#-------------------------------------------------------------------------
137reset_db
138do_execsql_test 4.0 {
139  CREATE VIRTUAL TABLE t0 USING fts5(b, tokenize = "trigram");
140}
141do_execsql_test 4.1 {
142  INSERT INTO t0 VALUES (x'000b01');
143}
144do_execsql_test 4.2 {
145  INSERT INTO t0(t0) VALUES('integrity-check');
146}
147
148#-------------------------------------------------------------------------
149reset_db
150foreach_detail_mode $::testprefix {
151  foreach {ci} {0 1} {
152    reset_db
153    do_execsql_test 5.cs=$ci.0.1 "
154      CREATE VIRTUAL TABLE t1 USING fts5(
155          y, tokenize=\"trigram case_sensitive $ci\", detail=%DETAIL%
156      );
157    "
158    do_execsql_test 5.cs=$ci.0.2 {
159      INSERT INTO t1 VALUES('abcdefghijklm');
160      INSERT INTO t1 VALUES('กรุงเทพมหานคร');
161    }
162
163    foreach {tn like res} {
164      1 {%cDef%}   1
165      2 {cDef%}    {}
166      3 {%f%}      1
167      4 {%f_h%}    1
168      5 {%f_g%}    {}
169      6 {abc%klm}  1
170      7 {ABCDEFG%} 1
171      8 {%รุงเ%}    2
172    } {
173      do_execsql_test 5.cs=$ci.1.$tn {
174        SELECT rowid FROM t1 WHERE y LIKE $like
175      } $res
176    }
177  }
178}
179
180do_execsql_test 6.0 {
181  CREATE VIRTUAL TABLE ci0 USING fts5(x, tokenize="trigram");
182  CREATE VIRTUAL TABLE ci1 USING fts5(x, tokenize="trigram case_sensitive 1");
183}
184
185# LIKE and GLOB both work with case-insensitive tokenizers. Only GLOB works
186# with case-sensitive.
187do_eqp_test 6.1 {
188  SELECT * FROM ci0 WHERE x LIKE ?
189} {VIRTUAL TABLE INDEX 0:L0}
190do_eqp_test 6.2 {
191  SELECT * FROM ci0 WHERE x GLOB ?
192} {VIRTUAL TABLE INDEX 0:G0}
193do_eqp_test 6.3 {
194  SELECT * FROM ci1 WHERE x LIKE ?
195} {{SCAN ci1 VIRTUAL TABLE INDEX 0:}}
196do_eqp_test 6.4 {
197  SELECT * FROM ci1 WHERE x GLOB ?
198} {VIRTUAL TABLE INDEX 0:G0}
199
200finish_test
201