xref: /sqlite-3.40.0/test/fts3prefix.test (revision 4b0000c9)
109643ab7Sdan# 2011 May 04
209643ab7Sdan#
309643ab7Sdan# The author disclaims copyright to this source code.  In place of
409643ab7Sdan# a legal notice, here is a blessing:
509643ab7Sdan#
609643ab7Sdan#    May you do good and not evil.
709643ab7Sdan#    May you find forgiveness for yourself and forgive others.
809643ab7Sdan#    May you share freely, never taking more than you give.
909643ab7Sdan#
1009643ab7Sdan#*************************************************************************
1109643ab7Sdan# This file implements regression tests for SQLite library.  The
1209643ab7Sdan# focus of this script is testing the FTS3 module.
1309643ab7Sdan#
1409643ab7Sdan
1509643ab7Sdanset testdir [file dirname $argv0]
1609643ab7Sdansource $testdir/tester.tcl
1709643ab7Sdanset testprefix fts3prefix
1809643ab7Sdan
199d1f874aSdanifcapable !fts3 {
209d1f874aSdan  finish_test
219d1f874aSdan  return
229d1f874aSdan}
239d1f874aSdan
2409643ab7Sdan# This proc tests that the prefixes index appears to represent the same content
2509643ab7Sdan# as the terms index.
2609643ab7Sdan#
279d1f874aSdanproc fts3_terms_and_prefixes {db tbl prefixlengths} {
289d1f874aSdan
299d1f874aSdan  set iIndex 0
309d1f874aSdan  foreach len $prefixlengths {
319d1f874aSdan    incr iIndex
329d1f874aSdan    $db eval {
339d1f874aSdan      DROP TABLE IF EXISTS fts3check1;
349d1f874aSdan      DROP TABLE IF EXISTS fts3check2;
359d1f874aSdan    }
369d1f874aSdan    $db eval "CREATE VIRTUAL TABLE fts3check1 USING fts4term($tbl, 0);"
379d1f874aSdan    $db eval "CREATE VIRTUAL TABLE fts3check2 USING fts4term($tbl, $iIndex);"
3809643ab7Sdan
3909643ab7Sdan    $db eval {
409d1f874aSdan      DROP TABLE IF EXISTS temp.terms;
419d1f874aSdan      DROP TABLE IF EXISTS temp.prefixes;
4209643ab7Sdan      CREATE TEMP TABLE terms AS SELECT * FROM fts3check1;
4309643ab7Sdan      CREATE TEMP TABLE prefixes AS SELECT * FROM fts3check2;
4409643ab7Sdan      CREATE INDEX temp.idx ON prefixes(term);
4509643ab7Sdan      DROP TABLE fts3check1;
4609643ab7Sdan      DROP TABLE fts3check2;
4709643ab7Sdan    }
4809643ab7Sdan
499d1f874aSdan    set nExpect 0
5009643ab7Sdan    $db eval { SELECT term, docid, col, pos FROM temp.terms } a {
519d1f874aSdan      if {[string length $a(term)]<$len} continue
529d1f874aSdan      incr nExpect
539d1f874aSdan      set prefix [string range $a(term) 0 [expr $len-1]]
5409643ab7Sdan      set r [$db one {
5509643ab7Sdan        SELECT count(*) FROM temp.prefixes WHERE
569d1f874aSdan        term = $prefix AND docid = $a(docid) AND col = $a(col) AND pos = $a(pos)
5709643ab7Sdan      }]
5809643ab7Sdan      if {$r != 1} {
5909643ab7Sdan        error "$t, $a(docid), $a(col), $a(pos)"
6009643ab7Sdan      }
6109643ab7Sdan    }
629d1f874aSdan
639d1f874aSdan    set nCount [$db one {SELECT count(*) FROM temp.prefixes}]
649d1f874aSdan    if {$nCount != $nExpect} {
659d1f874aSdan      error "prefixes.count(*) is $nCount expected $nExpect"
6609643ab7Sdan    }
6709643ab7Sdan
6809643ab7Sdan    execsql { DROP TABLE temp.prefixes }
6909643ab7Sdan    execsql { DROP TABLE temp.terms }
7009643ab7Sdan
719d1f874aSdan    set list [list]
729d1f874aSdan    $db eval "
739d1f874aSdan      SELECT sum( 1 << (16*(level%1024)) ) AS total, (level/1024) AS tree
749d1f874aSdan      FROM ${tbl}_segdir GROUP BY tree
759d1f874aSdan    " {
769d1f874aSdan      lappend list [list $total $tree]
779d1f874aSdan    }
7809643ab7Sdan
799d1f874aSdan    if { [lsort -integer -index 0 $list] != [lsort -integer -index 1 $list] } {
809d1f874aSdan      error "inconsistent tree structures: $list"
819d1f874aSdan    }
8209643ab7Sdan  }
8309643ab7Sdan
8409643ab7Sdan  return ""
8509643ab7Sdan}
869d1f874aSdanproc fts3_tap_test {tn db tbl lens} {
879d1f874aSdan  uplevel [list do_test $tn [list fts3_terms_and_prefixes $db $tbl $lens] ""]
8809643ab7Sdan}
8909643ab7Sdan
9009643ab7Sdan#-------------------------------------------------------------------------
9109643ab7Sdan# Test cases 1.* are a sanity check. They test that the prefixes index is
9209643ab7Sdan# being constructed correctly for the simplest possible case.
9309643ab7Sdan#
9409643ab7Sdando_execsql_test 1.1 {
959d1f874aSdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix='1,3,6');
969d1f874aSdan
979d1f874aSdan  CREATE VIRTUAL TABLE p1 USING fts4term(t1, 1);
989d1f874aSdan  CREATE VIRTUAL TABLE p2 USING fts4term(t1, 2);
999d1f874aSdan  CREATE VIRTUAL TABLE p3 USING fts4term(t1, 3);
10009643ab7Sdan  CREATE VIRTUAL TABLE terms USING fts4term(t1);
10109643ab7Sdan}
10209643ab7Sdando_execsql_test 1.2 {
10309643ab7Sdan  INSERT INTO t1 VALUES('sqlite mysql firebird');
10409643ab7Sdan}
1059d1f874aSdando_execsql_test 1.3.1 { SELECT term FROM p1 } {f m s}
1069d1f874aSdando_execsql_test 1.3.2 { SELECT term FROM p2 } {fir mys sql}
1079d1f874aSdando_execsql_test 1.3.3 { SELECT term FROM p3 } {firebi sqlite}
10809643ab7Sdando_execsql_test 1.4 {
10909643ab7Sdan  SELECT term FROM terms;
11009643ab7Sdan} {firebird mysql sqlite}
11109643ab7Sdan
1129d1f874aSdanfts3_tap_test 1.5 db t1 {1 3 6}
11309643ab7Sdan
11409643ab7Sdan#-------------------------------------------------------------------------
11509643ab7Sdan# A slightly more complicated dataset. This test also verifies that DELETE
11609643ab7Sdan# operations do not corrupt the prefixes index.
11709643ab7Sdan#
11809643ab7Sdando_execsql_test 2.1 {
11909643ab7Sdan  INSERT INTO t1 VALUES('FTS3 and FTS4 are an SQLite virtual table modules');
12009643ab7Sdan  INSERT INTO t1 VALUES('that allows users to perform full-text searches on');
12109643ab7Sdan  INSERT INTO t1 VALUES('a set of documents. The most common (and');
12209643ab7Sdan  INSERT INTO t1 VALUES('effective) way to describe full-text searches is');
12309643ab7Sdan  INSERT INTO t1 VALUES('"what Google, Yahoo and Altavista do with');
12409643ab7Sdan  INSERT INTO t1 VALUES('documents placed on the World Wide Web". Users');
12509643ab7Sdan  INSERT INTO t1 VALUES('input a term, or series of terms, perhaps');
12609643ab7Sdan  INSERT INTO t1 VALUES('connected by a binary operator or grouped together');
12709643ab7Sdan  INSERT INTO t1 VALUES('into a phrase, and the full-text query system');
12809643ab7Sdan  INSERT INTO t1 VALUES('finds the set of documents that best matches those');
12909643ab7Sdan  INSERT INTO t1 VALUES('terms considering the operators and groupings the');
13009643ab7Sdan  INSERT INTO t1 VALUES('user has specified. This article describes the');
13109643ab7Sdan  INSERT INTO t1 VALUES('deployment and usage of FTS3 and FTS4.');
13209643ab7Sdan  INSERT INTO t1 VALUES('FTS1 and FTS2 are obsolete full-text search');
13309643ab7Sdan  INSERT INTO t1 VALUES('modules for SQLite. There are known issues with');
13409643ab7Sdan  INSERT INTO t1 VALUES('these older modules and their use should be');
13509643ab7Sdan  INSERT INTO t1 VALUES('avoided. Portions of the original FTS3 code were');
13609643ab7Sdan  INSERT INTO t1 VALUES('contributed to the SQLite project by Scott Hess of');
13709643ab7Sdan  INSERT INTO t1 VALUES('Google. It is now developed and maintained as part');
13809643ab7Sdan  INSERT INTO t1 VALUES('of SQLite. ');
13909643ab7Sdan}
1409d1f874aSdanfts3_tap_test 2.2 db t1 {1 3 6}
14109643ab7Sdando_execsql_test 2.3 { DELETE FROM t1 WHERE docid%2; }
1429d1f874aSdanfts3_tap_test 2.4 db t1 {1 3 6}
14309643ab7Sdan
14409643ab7Sdando_execsql_test 2.5 { INSERT INTO t1(t1) VALUES('optimize') }
1459d1f874aSdanfts3_tap_test 2.6 db t1 {1 3 6}
14609643ab7Sdan
14709643ab7Sdando_execsql_test 3.1 {
1489d1f874aSdan  CREATE VIRTUAL TABLE t2 USING fts4(prefix='1,2,3');
14909643ab7Sdan  INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
15009643ab7Sdan  INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
15109643ab7Sdan  INSERT INTO t2 VALUES('fleet was forced to take shelter at');
15209643ab7Sdan  INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
15309643ab7Sdan  INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
15409643ab7Sdan  INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
15509643ab7Sdan  INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
15609643ab7Sdan  INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
15709643ab7Sdan  INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
15809643ab7Sdan  INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
15909643ab7Sdan  INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
16009643ab7Sdan  INSERT INTO t2 VALUES('On 12 September the wind direction turned and');
16109643ab7Sdan  INSERT INTO t2 VALUES('William''s fleet sailed. A storm blew up and the');
16209643ab7Sdan  INSERT INTO t2 VALUES('fleet was forced to take shelter at');
16309643ab7Sdan  INSERT INTO t2 VALUES('Saint-Valery-sur-Somme and again wait for the wind');
16409643ab7Sdan  INSERT INTO t2 VALUES('to change. On 27 September the Norman fleet');
16509643ab7Sdan  INSERT INTO t2 VALUES('finally set sail, landing in England at Pevensey');
16609643ab7Sdan  INSERT INTO t2 VALUES('Bay (Sussex) on 28 September. William then moved');
16709643ab7Sdan  INSERT INTO t2 VALUES('to Hastings, a few miles to the east, where he');
16809643ab7Sdan  INSERT INTO t2 VALUES('built a prefabricated wooden castle for a base of');
16909643ab7Sdan  INSERT INTO t2 VALUES('operations. From there, he ravaged the hinterland');
17009643ab7Sdan  INSERT INTO t2 VALUES('and waited for Harold''s return from the north.');
17109643ab7Sdan}
17209643ab7Sdan
1739d1f874aSdanfts3_tap_test 3.2 db t2 {1 2 3}
17409643ab7Sdando_execsql_test 3.3 { SELECT optimize(t2) FROM t2 LIMIT 1 } {{Index optimized}}
1759d1f874aSdanfts3_tap_test 3.4 db t2 {1 2 3}
17609643ab7Sdan
17709643ab7Sdan
17809643ab7Sdan#-------------------------------------------------------------------------
17909643ab7Sdan# Simple tests for reading the prefix-index.
18009643ab7Sdan#
18109643ab7Sdando_execsql_test 4.1 {
1829d1f874aSdan  CREATE VIRTUAL TABLE t3 USING fts4(prefix="1,4");
18309643ab7Sdan  INSERT INTO t3 VALUES('one two three');
18409643ab7Sdan  INSERT INTO t3 VALUES('four five six');
18509643ab7Sdan  INSERT INTO t3 VALUES('seven eight nine');
18609643ab7Sdan}
18709643ab7Sdando_execsql_test 4.2 {
18809643ab7Sdan  SELECT * FROM t3 WHERE t3 MATCH 'f*'
18909643ab7Sdan} {{four five six}}
19009643ab7Sdando_execsql_test 4.3 {
19109643ab7Sdan  SELECT * FROM t3 WHERE t3 MATCH 'four*'
19209643ab7Sdan} {{four five six}}
19309643ab7Sdando_execsql_test 4.4 {
19409643ab7Sdan  SELECT * FROM t3 WHERE t3 MATCH 's*'
19509643ab7Sdan} {{four five six} {seven eight nine}}
19698b08e71Sdando_execsql_test 4.5 {
19798b08e71Sdan  SELECT * FROM t3 WHERE t3 MATCH 'sev*'
19898b08e71Sdan} {{seven eight nine}}
19998b08e71Sdando_execsql_test 4.6 {
20098b08e71Sdan  SELECT * FROM t3 WHERE t3 MATCH 'one*'
20198b08e71Sdan} {{one two three}}
20209643ab7Sdan
20349fc3364Sdan#-------------------------------------------------------------------------
20449fc3364Sdan# Syntax tests.
20549fc3364Sdan#
20649fc3364Sdando_catchsql_test 5.1 {
20749fc3364Sdan  CREATE VIRTUAL TABLE t4 USING fts4(prefix="abc");
20849fc3364Sdan} {1 {error parsing prefix parameter: abc}}
20949fc3364Sdando_catchsql_test 5.2 {
21049fc3364Sdan  CREATE VIRTUAL TABLE t4 USING fts4(prefix="");
21149fc3364Sdan} {0 {}}
212e0417626Sdando_catchsql_test 5.3 {
213e0417626Sdan  CREATE VIRTUAL TABLE t5 USING fts4(prefix="-1");
214e0417626Sdan} {1 {error parsing prefix parameter: -1}}
215e0417626Sdan
216e0417626Sdan#-------------------------------------------------------------------------
217e0417626Sdan# Prefix indexes of size 0 are ignored. Demonstrate this by showing that
218e0417626Sdan# adding prefix=0 does not change the contents of the %_segdir table.
219e0417626Sdan#
220e0417626Sdanreset_db
221e0417626Sdando_execsql_test 6.1.1 {
222e0417626Sdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix=0);
223e0417626Sdan  CREATE VIRTUAL TABLE t2 USING fts4;
224e0417626Sdan  INSERT INTO t1 VALUES('Twas Mulga Bill, from Eaglehawk, ');
225e0417626Sdan  INSERT INTO t2 VALUES('Twas Mulga Bill, from Eaglehawk, ');
226e0417626Sdan} {}
227e0417626Sdando_execsql_test 6.1.2 {
228e0417626Sdan  SELECT md5sum(quote(root)) FROM t1_segdir;
229e0417626Sdan} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
230e0417626Sdan
231e0417626Sdanreset_db
232e0417626Sdando_execsql_test 6.2.1 {
233e0417626Sdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,0,2");
234e0417626Sdan  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
235e0417626Sdan  INSERT INTO t1 VALUES('that caught the cycling craze;');
236e0417626Sdan  INSERT INTO t2 VALUES('that caught the cycling craze;');
237e0417626Sdan} {}
238e0417626Sdando_execsql_test 6.2.2 {
239e0417626Sdan  SELECT md5sum(quote(root)) FROM t1_segdir;
240e0417626Sdan} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
241e0417626Sdan
242e0417626Sdanreset_db
243e0417626Sdando_execsql_test 6.3.1 {
244e0417626Sdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,3,2");
245e0417626Sdan  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
246e0417626Sdan  INSERT INTO t1 VALUES('He turned away the good old horse');
247e0417626Sdan  INSERT INTO t2 VALUES('He turned away the good old horse');
248e0417626Sdan} {}
249e0417626Sdando_test 6.3.2 {
250e0417626Sdan  set one [db eval {SELECT md5sum(quote(root)) FROM t1_segdir}]
251e0417626Sdan  set two [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
252e0417626Sdan  expr {$one == $two}
253e0417626Sdan} 0
254e0417626Sdan
255e0417626Sdanreset_db
256e0417626Sdando_execsql_test 6.4.1 {
257e0417626Sdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix="1,600,2");
258e0417626Sdan  CREATE VIRTUAL TABLE t2 USING fts4(prefix="1,2");
259e0417626Sdan  INSERT INTO t1 VALUES('that served him many days;');
260e0417626Sdan  INSERT INTO t2 VALUES('that served him many days;');
261e0417626Sdan} {}
262e0417626Sdando_execsql_test 6.4.2 {
263e0417626Sdan  SELECT md5sum(quote(root)) FROM t1_segdir;
264e0417626Sdan} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
265e0417626Sdan
266e0417626Sdanreset_db
267e0417626Sdando_execsql_test 6.5.1 {
268e0417626Sdan  CREATE VIRTUAL TABLE t1 USING fts4(prefix="2147483647,2147483648,2147483649");
269b7134d08Sdan  CREATE VIRTUAL TABLE t2 USING fts4(prefix=);
270e0417626Sdan  INSERT INTO t1 VALUES('He dressed himself in cycling clothes');
271e0417626Sdan  INSERT INTO t2 VALUES('He dressed himself in cycling clothes');
272e0417626Sdan} {}
273e0417626Sdando_execsql_test 6.5.2 {
274e0417626Sdan  SELECT md5sum(quote(root)) FROM t1_segdir;
275e0417626Sdan} [db eval {SELECT md5sum(quote(root)) FROM t2_segdir}]
27649fc3364Sdan
277*4b0000c9Sdan
278*4b0000c9Sdando_execsql_test 7.0 {
279*4b0000c9Sdan  CREATE VIRTUAL TABLE t6 USING fts4(x,order=DESC);
280*4b0000c9Sdan  INSERT INTO t6(docid, x) VALUES(-1,'a b');
281*4b0000c9Sdan  INSERT INTO t6(docid, x) VALUES(1, 'b');
282*4b0000c9Sdan}
283*4b0000c9Sdando_execsql_test 7.1 {
284*4b0000c9Sdan  SELECT docid FROM t6 WHERE t6 MATCH '"a* b"';
285*4b0000c9Sdan} {-1}
286*4b0000c9Sdando_execsql_test 7.2 {
287*4b0000c9Sdan  SELECT docid FROM t6 WHERE t6 MATCH 'a*';
288*4b0000c9Sdan} {-1}
289*4b0000c9Sdando_execsql_test 7.3 {
290*4b0000c9Sdan  SELECT docid FROM t6 WHERE t6 MATCH 'a* b';
291*4b0000c9Sdan} {-1}
292*4b0000c9Sdan
293*4b0000c9Sdan
294*4b0000c9Sdan
29509643ab7Sdanfinish_test
296