xref: /sqlite-3.40.0/test/fts3prefix.test (revision 4b0000c9)
1# 2011 May 04
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#*************************************************************************
11# This file implements regression tests for SQLite library.  The
12# focus of this script is testing the FTS3 module.
13#
14
15set testdir [file dirname $argv0]
16source $testdir/tester.tcl
17set testprefix fts3prefix
18
19ifcapable !fts3 {
20  finish_test
21  return
22}
23
24# This proc tests that the prefixes index appears to represent the same content
25# as the terms index.
26#
27proc fts3_terms_and_prefixes {db tbl prefixlengths} {
28
29  set iIndex 0
30  foreach len $prefixlengths {
31    incr iIndex
32    $db eval {
33      DROP TABLE IF EXISTS fts3check1;
34      DROP TABLE IF EXISTS fts3check2;
35    }
36    $db eval "CREATE VIRTUAL TABLE fts3check1 USING fts4term($tbl, 0);"
37    $db eval "CREATE VIRTUAL TABLE fts3check2 USING fts4term($tbl, $iIndex);"
38
39    $db eval {
40      DROP TABLE IF EXISTS temp.terms;
41      DROP TABLE IF EXISTS temp.prefixes;
42      CREATE TEMP TABLE terms AS SELECT * FROM fts3check1;
43      CREATE TEMP TABLE prefixes AS SELECT * FROM fts3check2;
44      CREATE INDEX temp.idx ON prefixes(term);
45      DROP TABLE fts3check1;
46      DROP TABLE fts3check2;
47    }
48
49    set nExpect 0
50    $db eval { SELECT term, docid, col, pos FROM temp.terms } a {
51      if {[string length $a(term)]<$len} continue
52      incr nExpect
53      set prefix [string range $a(term) 0 [expr $len-1]]
54      set r [$db one {
55        SELECT count(*) FROM temp.prefixes WHERE
56        term = $prefix AND docid = $a(docid) AND col = $a(col) AND pos = $a(pos)
57      }]
58      if {$r != 1} {
59        error "$t, $a(docid), $a(col), $a(pos)"
60      }
61    }
62
63    set nCount [$db one {SELECT count(*) FROM temp.prefixes}]
64    if {$nCount != $nExpect} {
65      error "prefixes.count(*) is $nCount expected $nExpect"
66    }
67
68    execsql { DROP TABLE temp.prefixes }
69    execsql { DROP TABLE temp.terms }
70
71    set list [list]
72    $db eval "
73      SELECT sum( 1 << (16*(level%1024)) ) AS total, (level/1024) AS tree
74      FROM ${tbl}_segdir GROUP BY tree
75    " {
76      lappend list [list $total $tree]
77    }
78
79    if { [lsort -integer -index 0 $list] != [lsort -integer -index 1 $list] } {
80      error "inconsistent tree structures: $list"
81    }
82  }
83
84  return ""
85}
86proc fts3_tap_test {tn db tbl lens} {
87  uplevel [list do_test $tn [list fts3_terms_and_prefixes $db $tbl $lens] ""]
88}
89
90#-------------------------------------------------------------------------
91# Test cases 1.* are a sanity check. They test that the prefixes index is
92# being constructed correctly for the simplest possible case.
93#
94do_execsql_test 1.1 {
95  CREATE VIRTUAL TABLE t1 USING fts4(prefix='1,3,6');
96
97  CREATE VIRTUAL TABLE p1 USING fts4term(t1, 1);
98  CREATE VIRTUAL TABLE p2 USING fts4term(t1, 2);
99  CREATE VIRTUAL TABLE p3 USING fts4term(t1, 3);
100  CREATE VIRTUAL TABLE terms USING fts4term(t1);
101}
102do_execsql_test 1.2 {
103  INSERT INTO t1 VALUES('sqlite mysql firebird');
104}
105do_execsql_test 1.3.1 { SELECT term FROM p1 } {f m s}
106do_execsql_test 1.3.2 { SELECT term FROM p2 } {fir mys sql}
107do_execsql_test 1.3.3 { SELECT term FROM p3 } {firebi sqlite}
108do_execsql_test 1.4 {
109  SELECT term FROM terms;
110} {firebird mysql sqlite}
111
112fts3_tap_test 1.5 db t1 {1 3 6}
113
114#-------------------------------------------------------------------------
115# A slightly more complicated dataset. This test also verifies that DELETE
116# operations do not corrupt the prefixes index.
117#
118do_execsql_test 2.1 {
119  INSERT INTO t1 VALUES('FTS3 and FTS4 are an SQLite virtual table modules');
120  INSERT INTO t1 VALUES('that allows users to perform full-text searches on');
121  INSERT INTO t1 VALUES('a set of documents. The most common (and');
122  INSERT INTO t1 VALUES('effective) way to describe full-text searches is');
123  INSERT INTO t1 VALUES('"what Google, Yahoo and Altavista do with');
124  INSERT INTO t1 VALUES('documents placed on the World Wide Web". Users');
125  INSERT INTO t1 VALUES('input a term, or series of terms, perhaps');
126  INSERT INTO t1 VALUES('connected by a binary operator or grouped together');
127  INSERT INTO t1 VALUES('into a phrase, and the full-text query system');
128  INSERT INTO t1 VALUES('finds the set of documents that best matches those');
129  INSERT INTO t1 VALUES('terms considering the operators and groupings the');
130  INSERT INTO t1 VALUES('user has specified. This article describes the');
131  INSERT INTO t1 VALUES('deployment and usage of FTS3 and FTS4.');
132  INSERT INTO t1 VALUES('FTS1 and FTS2 are obsolete full-text search');
133  INSERT INTO t1 VALUES('modules for SQLite. There are known issues with');
134  INSERT INTO t1 VALUES('these older modules and their use should be');
135  INSERT INTO t1 VALUES('avoided. Portions of the original FTS3 code were');
136  INSERT INTO t1 VALUES('contributed to the SQLite project by Scott Hess of');
137  INSERT INTO t1 VALUES('Google. It is now developed and maintained as part');
138  INSERT INTO t1 VALUES('of SQLite. ');
139}
140fts3_tap_test 2.2 db t1 {1 3 6}
141do_execsql_test 2.3 { DELETE FROM t1 WHERE docid%2; }
142fts3_tap_test 2.4 db t1 {1 3 6}
143
144do_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') }
145fts3_tap_test 2.6 db t1 {1 3 6}
146
147do_execsql_test 3.1 {
148  CREATE VIRTUAL TABLE t2 USING fts4(prefix='1,2,3');
149  INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
150  INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
151  INSERT INTO t2 VALUES('fleet was forced to take shelter at');
152  INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
153  INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
154  INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
155  INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
156  INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
157  INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
158  INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
159  INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
160  INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
161  INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
162  INSERT INTO t2 VALUES('fleet was forced to take shelter at');
163  INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
164  INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
165  INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
166  INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
167  INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
168  INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
169  INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
170  INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
171}
172
173fts3_tap_test 3.2 db t2 {1 2 3}
174do_execsql_test 3.3 { SELECT optimize(t2) FROM t2 LIMIT 1 } {{Index optimized}}
175fts3_tap_test 3.4 db t2 {1 2 3}
176
177
178#-------------------------------------------------------------------------
179# Simple tests for reading the prefix-index.
180#
181do_execsql_test 4.1 {
182  CREATE VIRTUAL TABLE t3 USING fts4(prefix="1,4");
183  INSERT INTO t3 VALUES('one two three');
184  INSERT INTO t3 VALUES('four five six');
185  INSERT INTO t3 VALUES('seven eight nine');
186}
187do_execsql_test 4.2 {
188  SELECT * FROM t3 WHERE t3 MATCH 'f*'
189} {{four five six}}
190do_execsql_test 4.3 {
191  SELECT * FROM t3 WHERE t3 MATCH 'four*'
192} {{four five six}}
193do_execsql_test 4.4 {
194  SELECT * FROM t3 WHERE t3 MATCH 's*'
195} {{four five six} {seven eight nine}}
196do_execsql_test 4.5 {
197  SELECT * FROM t3 WHERE t3 MATCH 'sev*'
198} {{seven eight nine}}
199do_execsql_test 4.6 {
200  SELECT * FROM t3 WHERE t3 MATCH 'one*'
201} {{one two three}}
202
203#-------------------------------------------------------------------------
204# Syntax tests.
205#
206do_catchsql_test 5.1 {
207  CREATE VIRTUAL TABLE t4 USING fts4(prefix="abc");
208} {1 {error parsing prefix parameter: abc}}
209do_catchsql_test 5.2 {
210  CREATE VIRTUAL TABLE t4 USING fts4(prefix="");
211} {0 {}}
212do_catchsql_test 5.3 {
213  CREATE VIRTUAL TABLE t5 USING fts4(prefix="-1");
214} {1 {error parsing prefix parameter: -1}}
215
216#-------------------------------------------------------------------------
217# Prefix indexes of size 0 are ignored. Demonstrate this by showing that
218# adding prefix=0 does not change the contents of the %_segdir table.
219#
220reset_db
221do_execsql_test 6.1.1 {
222  CREATE VIRTUAL TABLE t1 USING fts4(prefix=0);
223  CREATE VIRTUAL TABLE t2 USING fts4;
224  INSERT INTO t1 VALUES('Twas Mulga Bill, from Eaglehawk, ');
225  INSERT INTO t2 VALUES('Twas Mulga Bill, from Eaglehawk, ');
226} {}
227do_execsql_test 6.1.2 {
228  SELECT md5sum(quote(root)) FROM t1_segdir;
229} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
230
231reset_db
232do_execsql_test 6.2.1 {
233  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,0,2");
234  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
235  INSERT INTO t1 VALUES('that caught the cycling craze;');
236  INSERT INTO t2 VALUES('that caught the cycling craze;');
237} {}
238do_execsql_test 6.2.2 {
239  SELECT md5sum(quote(root)) FROM t1_segdir;
240} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
241
242reset_db
243do_execsql_test 6.3.1 {
244  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,3,2");
245  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
246  INSERT INTO t1 VALUES('He turned away the good old horse');
247  INSERT INTO t2 VALUES('He turned away the good old horse');
248} {}
249do_test 6.3.2 {
250  set one [db eval {SELECT md5sum(quote(root)) FROM t1_segdir}]
251  set two [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
252  expr {$one == $two}
253} 0
254
255reset_db
256do_execsql_test 6.4.1 {
257  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,600,2");
258  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
259  INSERT INTO t1 VALUES('that served him many days;');
260  INSERT INTO t2 VALUES('that served him many days;');
261} {}
262do_execsql_test 6.4.2 {
263  SELECT md5sum(quote(root)) FROM t1_segdir;
264} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
265
266reset_db
267do_execsql_test 6.5.1 {
268  CREATE VIRTUAL TABLE t1 USING fts4(prefix="2147483647,2147483648,2147483649");
269  CREATE VIRTUAL TABLE t2 USING fts4(prefix=);
270  INSERT INTO t1 VALUES('He dressed himself in cycling clothes');
271  INSERT INTO t2 VALUES('He dressed himself in cycling clothes');
272} {}
273do_execsql_test 6.5.2 {
274  SELECT md5sum(quote(root)) FROM t1_segdir;
275} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
276
277
278do_execsql_test 7.0 {
279  CREATE VIRTUAL TABLE t6 USING fts4(x,order=DESC);
280  INSERT INTO t6(docid, x) VALUES(-1,'a b');
281  INSERT INTO t6(docid, x) VALUES(1, 'b');
282}
283do_execsql_test 7.1 {
284  SELECT docid FROM t6 WHERE t6 MATCH '"a* b"';
285} {-1}
286do_execsql_test 7.2 {
287  SELECT docid FROM t6 WHERE t6 MATCH 'a*';
288} {-1}
289do_execsql_test 7.3 {
290  SELECT docid FROM t6 WHERE t6 MATCH 'a* b';
291} {-1}
292
293
294
295finish_test
296