xref: /sqlite-3.40.0/ext/fts5/test/fts5hash.test (revision f2e151ae)
1# 2015 April 21
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# The tests in this file are focused on the code in fts5_hash.c.
13#
14
15source [file join [file dirname [info script]] fts5_common.tcl]
16set testprefix fts5hash
17
18# If SQLITE_ENABLE_FTS5 is defined, omit this file.
19ifcapable !fts5 {
20  finish_test
21  return
22}
23
24#-------------------------------------------------------------------------
25# Return a list of tokens (a vocabulary) that all share the same hash
26# key value. This can be used to test hash collisions.
27#
28proc build_vocab1 {args} {
29
30  set O(-nslot) 1024
31  set O(-nword)   20
32  set O(-hash)    88
33  set O(-prefix)  ""
34
35  if {[llength $args] % 2} { error "bad args" }
36  array set O2 $args
37  foreach {k v} $args {
38    if {[info exists O($k)]==0} { error "bad option: $k" }
39    set O($k) $v
40  }
41
42  set L [list]
43  while {[llength $L] < $O(-nword)} {
44    set t "$O(-prefix)[random_token]"
45    set h [sqlite3_fts5_token_hash $O(-nslot) $t]
46    if {$O(-hash)==$h} { lappend L $t }
47  }
48  return $L
49}
50
51proc random_token {} {
52  set map [list 0 a  1 b  2 c  3 d  4 e  5 f  6 g  7 h  8 i  9 j]
53  set iVal [expr int(rand() * 2000000)]
54  return [string map $map $iVal]
55}
56
57proc random_doc {vocab nWord} {
58  set doc ""
59  set nVocab [llength $vocab]
60  for {set i 0} {$i<$nWord} {incr i} {
61    set j [expr {int(rand() * $nVocab)}]
62    lappend doc [lindex $vocab $j]
63  }
64  return $doc
65}
66
67foreach_detail_mode $testprefix {
68
69  set vocab [build_vocab1]
70  db func r random_doc
71
72  do_execsql_test 1.0 {
73    CREATE VIRTUAL TABLE eee USING fts5(e, ee, detail=%DETAIL%);
74    BEGIN;
75      WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<100)
76      INSERT INTO eee SELECT r($vocab, 5), r($vocab, 7) FROM ii;
77      INSERT INTO eee(eee) VALUES('integrity-check');
78    COMMIT;
79    INSERT INTO eee(eee) VALUES('integrity-check');
80  }
81
82  set hash [sqlite3_fts5_token_hash 1024 xyz]
83  set vocab [build_vocab1 -prefix xyz -hash $hash]
84  lappend vocab xyz
85
86  do_execsql_test 1.1 {
87    CREATE VIRTUAL TABLE vocab USING fts5vocab(eee, 'row');
88    BEGIN;
89  }
90  do_test 1.2 {
91    for {set i 1} {$i <= 100} {incr i} {
92      execsql { INSERT INTO eee VALUES( r($vocab, 5), r($vocab, 7) ) }
93    }
94  } {}
95
96  do_test 1.3 {
97    db eval { SELECT term, doc FROM vocab } {
98      set nRow [db one {SELECT count(*) FROM eee WHERE eee MATCH $term}]
99      if {$nRow != $doc} {
100        error "term=$term fts5vocab=$doc cnt=$nRow"
101      }
102    }
103    set {} {}
104  } {}
105
106  do_execsql_test 1.4 {
107    COMMIT;
108    INSERT INTO eee(eee) VALUES('integrity-check');
109  }
110
111  #-----------------------------------------------------------------------
112  # Add a small and very large token with the same hash value to an
113  # empty table. At one point this would provoke an asan error.
114  #
115  do_test 1.5 {
116    set big [string repeat 12345 40]
117    set hash [sqlite3_fts5_token_hash 1024 $big]
118    while {1} {
119      set small [random_token]
120      if {[sqlite3_fts5_token_hash 1024 $small]==$hash} break
121    }
122
123    execsql { CREATE VIRTUAL TABLE t2 USING fts5(x, detail=%DETAIL%) }
124    execsql {
125      INSERT INTO t2 VALUES($small || ' ' || $big);
126    }
127  } {}
128
129} ;# foreach_detail_mode
130
131#-------------------------------------------------------------------------
132reset_db
133do_execsql_test 2.1 {
134  CREATE VIRTUAL TABLE t1 USING fts5(x);
135  INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
136  INSERT INTO t1(t1, rank) VALUES('automerge', 0);
137  INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
138}
139
140do_execsql_test 2.2 {
141  BEGIN;
142    INSERT INTO t1 VALUES('abc def ghi');
143    SELECT count(*) FROM t1_data;
144} {2}
145
146do_execsql_test 2.3 {
147  WITH s(i) AS (
148    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
149  )
150  INSERT INTO t1 SELECT 'abc def ghi' FROM s;
151  SELECT (SELECT count(*) FROM t1_data) > 10;
152} {1}
153
154do_execsql_test 2.4 {
155  COMMIT;
156  DROP TABLE t1;
157  CREATE VIRTUAL TABLE t1 USING fts5(x);
158  INSERT INTO t1(t1, rank) VALUES('hashsize', 1024);
159  INSERT INTO t1(t1, rank) VALUES('automerge', 0);
160  INSERT INTO t1(t1, rank) VALUES('crisismerge', 1000);
161  WITH s(i) AS (
162    SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1024
163  )
164  INSERT INTO t1 SELECT 'abc' || i || ' def' || i || ' ghi' || i FROM s;
165  SELECT (SELECT count(*) FROM t1_data) > 100;
166} {1}
167
168finish_test
169