1# 2014 Dec 20 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# 12# Tests focusing on custom tokenizers that support synonyms. 13# 14 15source [file join [file dirname [info script]] fts5_common.tcl] 16set testprefix fts5synonym 17 18# If SQLITE_ENABLE_FTS5 is defined, omit this file. 19ifcapable !fts5 { 20 finish_test 21 return 22} 23 24proc tcl_create {args} { return "tcl_tokenize" } 25 26foreach_detail_mode $testprefix { 27 28#------------------------------------------------------------------------- 29# Warm body test for the code in fts5_tcl.c. 30# 31fts5_tclnum_register db 32do_execsql_test 1.0 { 33 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = "tclnum document", detail=%DETAIL%); 34 INSERT INTO ft VALUES('abc def ghi'); 35 INSERT INTO ft VALUES('jkl mno pqr'); 36 SELECT rowid, x FROM ft WHERE ft MATCH 'def'; 37 SELECT x, rowid FROM ft WHERE ft MATCH 'pqr'; 38} {1 {abc def ghi} {jkl mno pqr} 2} 39 40#------------------------------------------------------------------------- 41# Test a tokenizer that supports synonyms by adding extra entries to the 42# FTS index. 43# 44reset_db 45fts5_tclnum_register db 46 47do_execsql_test 2.0 { 48 CREATE VIRTUAL TABLE ft USING fts5( 49 x, tokenize = "tclnum document", detail=%DETAIL% 50 ); 51 INSERT INTO ft VALUES('one two three'); 52 INSERT INTO ft VALUES('four five six'); 53 INSERT INTO ft VALUES('eight nine ten'); 54} {} 55 56foreach {tn expr res} { 57 1 "3" 1 58 2 "eight OR 8 OR 5" {2 3} 59 3 "10" {} 60 4 "1*" {1} 61 5 "1 + 2" {1} 62} { 63 if {![fts5_expr_ok $expr ft]} continue 64 do_execsql_test 2.1.$tn { 65 SELECT rowid FROM ft WHERE ft MATCH $expr 66 } $res 67} 68 69#------------------------------------------------------------------------- 70# Test some broken tokenizers: 71# 72# 3.1.*: A tokenizer that declares the very first token to be colocated. 73# 74# 3.2.*: A tokenizer that reports two identical tokens at the same position. 75# This is allowed. 76# 77reset_db 78sqlite3_fts5_create_tokenizer db tcl tcl_create 79proc tcl_tokenize {tflags text} { 80 set bColo 1 81 foreach {w iStart iEnd} [fts5_tokenize_split $text] { 82 if {$bColo} { 83 sqlite3_fts5_token -colo $w $iStart $iEnd 84 set bColo 0 85 } { 86 sqlite3_fts5_token $w $iStart $iEnd 87 } 88 } 89} 90do_execsql_test 3.1.0 { 91 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); 92 INSERT INTO ft VALUES('one two three'); 93 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); 94 SELECT * FROM vv; 95} { 96 one 1 1 three 1 1 two 1 1 97} 98 99do_execsql_test 3.1.1 { 100 INSERT INTO ft(ft) VALUES('integrity-check'); 101} {} 102 103proc tcl_tokenize {tflags text} { 104 foreach {w iStart iEnd} [fts5_tokenize_split $text] { 105 sqlite3_fts5_token $w $iStart $iEnd 106 } 107} 108 109do_execsql_test 3.1.2 { 110 SELECT rowid FROM ft WHERE ft MATCH 'one two three' 111} {1} 112 113reset_db 114sqlite3_fts5_create_tokenizer db tcl tcl_create 115proc tcl_tokenize {tflags text} { 116 foreach {w iStart iEnd} [fts5_tokenize_split $text] { 117 sqlite3_fts5_token $w $iStart $iEnd 118 sqlite3_fts5_token -colo $w $iStart $iEnd 119 } 120} 121do_execsql_test 3.2.0 { 122 CREATE VIRTUAL TABLE ft USING fts5(x, tokenize = tcl); 123 INSERT INTO ft VALUES('one one two three'); 124 CREATE VIRTUAL TABLE vv USING fts5vocab(ft, row); 125 SELECT * FROM vv; 126} { 127 one 1 4 three 1 2 two 1 2 128} 129do_execsql_test 3.2.1 { 130 SELECT rowid FROM ft WHERE ft MATCH 'one'; 131} {1} 132do_execsql_test 3.2.2 { 133 SELECT rowid FROM ft WHERE ft MATCH 'one two three'; 134} {1} 135do_execsql_test 3.2.3 { 136 SELECT rowid FROM ft WHERE ft MATCH 'one + one + two + three'; 137} {1} 138do_execsql_test 3.2.4 { 139 SELECT rowid FROM ft WHERE ft MATCH 'one two two three'; 140} {1} 141do_execsql_test 3.2.5 { 142 SELECT rowid FROM ft WHERE ft MATCH 'one + two + two + three'; 143} {} 144 145#------------------------------------------------------------------------- 146# Check that expressions with synonyms can be parsed and executed. 147# 148reset_db 149fts5_tclnum_register db 150 151foreach {tn expr res} { 152 1 {abc} {"abc"} 153 2 {one} {"one"|"i"|"1"} 154 3 {3} {"3"|"iii"|"three"} 155 4 {3*} {"3" *} 156} { 157 do_execsql_test 4.1.$tn { 158 SELECT fts5_expr($expr, 'tokenize=tclnum') 159 } [list $res] 160} 161 162do_execsql_test 4.2.1 { 163 CREATE VIRTUAL TABLE xx USING fts5(x, tokenize=tclnum, detail=%DETAIL%); 164 INSERT INTO xx VALUES('one two'); 165 INSERT INTO xx VALUES('three four'); 166} 167 168do_execsql_test 4.2.2 { 169 SELECT rowid FROM xx WHERE xx MATCH '2' 170} {1} 171 172do_execsql_test 4.2.3 { 173 SELECT rowid FROM xx WHERE xx MATCH '3' 174} {2} 175 176do_test 5.0 { 177 execsql { 178 CREATE VIRTUAL TABLE t1 USING fts5(a, b, tokenize=tclnum, detail=%DETAIL%) 179 } 180 foreach {rowid a b} { 181 1 {four v 4 i three} {1 3 five five 4 one} 182 2 {5 1 3 4 i} {2 2 v two 4} 183 3 {5 i 5 2 four 4 1} {iii ii five two 1} 184 4 {ii four 4 one 5 three five} {one 5 1 iii 4 3} 185 5 {three i v i four 4 1} {ii five five five iii} 186 6 {4 2 ii two 2 iii} {three 1 four 4 iv 1 iv} 187 7 {ii ii two three 2 5} {iii i ii iii iii one one} 188 8 {2 ii i two 3 three 2} {two iv v iii 3 five} 189 9 {i 2 iv 3 five four v} {iii 4 three i three ii 1} 190 } { 191 execsql { INSERT INTO t1(rowid, a, b) VALUES($rowid, $a, $b) } 192 } 193} {} 194 195 196foreach {tn q res} { 197 1 {one} { 198 1 {four v 4 [i] three} {[1] 3 five five 4 [one]} 199 2 {5 [1] 3 4 [i]} {2 2 v two 4} 200 3 {5 [i] 5 2 four 4 [1]} {iii ii five two [1]} 201 4 {ii four 4 [one] 5 three five} {[one] 5 [1] iii 4 3} 202 5 {three [i] v [i] four 4 [1]} {ii five five five iii} 203 6 {4 2 ii two 2 iii} {three [1] four 4 iv [1] iv} 204 7 {ii ii two three 2 5} {iii [i] ii iii iii [one] [one]} 205 8 {2 ii [i] two 3 three 2} {two iv v iii 3 five} 206 9 {[i] 2 iv 3 five four v} {iii 4 three [i] three ii [1]} 207 } 208 2 {five four} { 209 1 {[four] [v] [4] i three} {1 3 [five] [five] [4] one} 210 2 {[5] 1 3 [4] i} {2 2 [v] two [4]} 211 3 {[5] i [5] 2 [four] [4] 1} {iii ii [five] two 1} 212 4 {ii [four] [4] one [5] three [five]} {one [5] 1 iii [4] 3} 213 5 {three i [v] i [four] [4] 1} {ii [five] [five] [five] iii} 214 8 {2 ii i two 3 three 2} {two [iv] [v] iii 3 [five]} 215 9 {i 2 [iv] 3 [five] [four] [v]} {iii [4] three i three ii 1} 216 } 217 3 {one OR two OR iii OR 4 OR v} { 218 1 {[four] [v] [4] [i] [three]} {[1] [3] [five] [five] [4] [one]} 219 2 {[5] [1] [3] [4] [i]} {[2] [2] [v] [two] [4]} 220 3 {[5] [i] [5] [2] [four] [4] [1]} {[iii] [ii] [five] [two] [1]} 221 4 {[ii] [four] [4] [one] [5] [three] [five]} {[one] [5] [1] [iii] [4] [3]} 222 5 {[three] [i] [v] [i] [four] [4] [1]} {[ii] [five] [five] [five] [iii]} 223 6 {[4] [2] [ii] [two] [2] [iii]} {[three] [1] [four] [4] [iv] [1] [iv]} 224 7 {[ii] [ii] [two] [three] [2] [5]} {[iii] [i] [ii] [iii] [iii] [one] [one]} 225 8 {[2] [ii] [i] [two] [3] [three] [2]} {[two] [iv] [v] [iii] [3] [five]} 226 9 {[i] [2] [iv] [3] [five] [four] [v]} {[iii] [4] [three] [i] [three] [ii] [1]} 227 } 228 229 4 {5 + 1} { 230 2 {[5 1] 3 4 i} {2 2 v two 4} 231 3 {[5 i] 5 2 four 4 1} {iii ii five two 1} 232 4 {ii four 4 one 5 three five} {one [5 1] iii 4 3} 233 5 {three i [v i] four 4 1} {ii five five five iii} 234 } 235 236 5 {one + two + three} { 237 7 {ii ii two three 2 5} {iii [i ii iii] iii one one} 238 8 {2 ii [i two 3] three 2} {two iv v iii 3 five} 239 } 240 241 6 {"v v"} { 242 1 {four v 4 i three} {1 3 [five five] 4 one} 243 5 {three i v i four 4 1} {ii [five five five] iii} 244 } 245} { 246 if {![fts5_expr_ok $q t1]} continue 247 do_execsql_test 5.1.$tn { 248 SELECT rowid, highlight(t1, 0, '[', ']'), highlight(t1, 1, '[', ']') 249 FROM t1 WHERE t1 MATCH $q 250 } $res 251} 252 253# Test that the xQueryPhrase() API works with synonyms. 254# 255proc mit {blob} { 256 set scan(littleEndian) i* 257 set scan(bigEndian) I* 258 binary scan $blob $scan($::tcl_platform(byteOrder)) r 259 return $r 260} 261db func mit mit 262sqlite3_fts5_register_matchinfo db 263 264foreach {tn q res} { 265 1 {one} { 266 1 {1 11 7 2 12 6} 2 {2 11 7 0 12 6} 267 3 {2 11 7 1 12 6} 4 {1 11 7 2 12 6} 268 5 {3 11 7 0 12 6} 6 {0 11 7 2 12 6} 269 7 {0 11 7 3 12 6} 8 {1 11 7 0 12 6} 270 9 {1 11 7 2 12 6} 271 } 272} { 273 do_execsql_test 5.2.$tn { 274 SELECT rowid, mit(matchinfo(t1, 'x')) FROM t1 WHERE t1 MATCH $q 275 } $res 276} 277 278#------------------------------------------------------------------------- 279# Test terms with more than 4 synonyms. 280# 281reset_db 282sqlite3_fts5_create_tokenizer db tcl tcl_create 283proc tcl_tokenize {tflags text} { 284 foreach {w iStart iEnd} [fts5_tokenize_split $text] { 285 sqlite3_fts5_token $w $iStart $iEnd 286 if {$tflags=="query" && [string length $w]==1} { 287 for {set i 2} {$i<=10} {incr i} { 288 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd 289 } 290 } 291 } 292} 293 294do_execsql_test 6.0.1 { 295 CREATE VIRTUAL TABLE t1 USING fts5(x, tokenize=tcl, detail=%DETAIL%); 296 INSERT INTO t1 VALUES('yy xx qq'); 297 INSERT INTO t1 VALUES('yy xx xx'); 298} 299if {[fts5_expr_ok "NEAR(y q)" t1]} { 300 do_execsql_test 6.0.2 { 301 SELECT * FROM t1 WHERE t1 MATCH 'NEAR(y q)'; 302 } {{yy xx qq}} 303} 304 305do_test 6.0.3 { 306 execsql { 307 CREATE VIRTUAL TABLE t2 USING fts5(a, b, tokenize=tcl, detail=%DETAIL%) 308 } 309 foreach {rowid a b} { 310 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq aaaa} 311 2 {ww oooooo bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} 312 3 {zzzz llll gggggg cccc uu} {hhhhhh aaaa ppppp rr ee jjjj} 313 4 {r f i rrrrrr ww hhh} {aa yyy t x aaaaa ii} 314 5 {fffff mm vvvv ooo ffffff kkkk tttt} {cccccc bb e zzz d n} 315 6 {iii dddd hh qqqq ddd ooo} {ttt d c b aaaaaa qqqq} 316 7 {jjjj rrrr v zzzzz u tt t} {ppppp pp dddd mm hhh uuu} 317 8 {gggg rrrrrr kkkk vvvv gggg jjjjjj b} {dddddd jj r w cccc wwwwww ss} 318 9 {kkkkk qqq oooo e tttttt mmm} {e ss qqqqqq hhhh llllll gg} 319 } { 320 execsql { INSERT INTO t2(rowid, a, b) VALUES($rowid, $a, $b) } 321 } 322} {} 323 324foreach {tn q res} { 325 1 {a} { 326 1 {yyyy vvvvv qq oo yyyyyy vvvv eee} {ffff uu r qq [aaaa]} 327 3 {zzzz llll gggggg cccc uu} {hhhhhh [aaaa] ppppp rr ee jjjj} 328 4 {r f i rrrrrr ww hhh} {[aa] yyy t x [aaaaa] ii} 329 6 {iii dddd hh qqqq ddd ooo} {ttt d c b [aaaaaa] qqqq} 330 } 331 332 2 {a AND q} { 333 1 {yyyy vvvvv [qq] oo yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} 334 6 {iii dddd hh [qqqq] ddd ooo} {ttt d c b [aaaaaa] [qqqq]} 335 } 336 337 3 {o OR (q AND a)} { 338 1 {yyyy vvvvv [qq] [oo] yyyyyy vvvv eee} {ffff uu r [qq] [aaaa]} 339 2 {ww [oooooo] bbbbb ssssss mm} {ffffff yy iiii rr s ccc qqqqq} 340 5 {fffff mm vvvv [ooo] ffffff kkkk tttt} {cccccc bb e zzz d n} 341 6 {iii dddd hh [qqqq] ddd [ooo]} {ttt d c b [aaaaaa] [qqqq]} 342 9 {kkkkk qqq [oooo] e tttttt mmm} {e ss qqqqqq hhhh llllll gg} 343 } 344 345 4 {NEAR(q y, 20)} { 346 1 {[yyyy] vvvvv [qq] oo [yyyyyy] vvvv eee} {ffff uu r qq aaaa} 347 2 {ww oooooo bbbbb ssssss mm} {ffffff [yy] iiii rr s ccc [qqqqq]} 348 } 349} { 350 if {![fts5_expr_ok $q t2]} continue 351 352 do_execsql_test 6.1.$tn.asc { 353 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') 354 FROM t2 WHERE t2 MATCH $q 355 } $res 356 357 set res2 [list] 358 foreach {rowid a b} $res { 359 set res2 [concat [list $rowid $a $b] $res2] 360 } 361 362 do_execsql_test 6.1.$tn.desc { 363 SELECT rowid, highlight(t2, 0, '[', ']'), highlight(t2, 1, '[', ']') 364 FROM t2 WHERE t2 MATCH $q ORDER BY rowid DESC 365 } $res2 366} 367 368do_execsql_test 6.2.1 { 369 INSERT INTO t2(rowid, a, b) VALUES(13, 370 'x xx xxx xxxx xxxxx xxxxxx xxxxxxx', 'y yy yyy yyyy yyyyy yyyyyy yyyyyyy' 371 ); 372 SELECT rowid, highlight(t2, 0, '<', '>'), highlight(t2, 1, '(', ')') 373 FROM t2 WHERE t2 MATCH 'x OR y' 374} { 375 1 {<yyyy> vvvvv qq oo <yyyyyy> vvvv eee} {ffff uu r qq aaaa} 376 2 {ww oooooo bbbbb ssssss mm} {ffffff (yy) iiii rr s ccc qqqqq} 377 4 {r f i rrrrrr ww hhh} {aa (yyy) t (x) aaaaa ii} 378 13 {<x> <xx> <xxx> <xxxx> <xxxxx> <xxxxxx> <xxxxxxx>} 379 {(y) (yy) (yyy) (yyyy) (yyyyy) (yyyyyy) (yyyyyyy)} 380} 381 382#------------------------------------------------------------------------- 383# Test that the xColumnSize() API is not confused by colocated tokens. 384# 385reset_db 386sqlite3_fts5_create_tokenizer db tcl tcl_create 387fts5_aux_test_functions db 388proc tcl_tokenize {tflags text} { 389 foreach {w iStart iEnd} [fts5_tokenize_split $text] { 390 sqlite3_fts5_token $w $iStart $iEnd 391 if {[string length $w]==1} { 392 for {set i 2} {$i<=10} {incr i} { 393 sqlite3_fts5_token -colo [string repeat $w $i] $iStart $iEnd 394 } 395 } 396 } 397} 398 399do_execsql_test 7.0.1 { 400 CREATE VIRTUAL TABLE t1 USING fts5(a, b, columnsize=1, tokenize=tcl, detail=%DETAIL%); 401 INSERT INTO t1 VALUES('0 2 3', '4 5 6 7'); 402 INSERT INTO t1 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); 403 SELECT fts5_test_columnsize(t1) FROM t1 WHERE t1 MATCH '000 AND 00 AND 0'; 404} {{3 4} {2 10}} 405 406do_execsql_test 7.0.2 { 407 INSERT INTO t1(t1) VALUES('integrity-check'); 408} 409 410do_execsql_test 7.1.1 { 411 CREATE VIRTUAL TABLE t2 USING fts5(a, b, columnsize=0, tokenize=tcl, detail=%DETAIL%); 412 INSERT INTO t2 VALUES('0 2 3', '4 5 6 7'); 413 INSERT INTO t2 VALUES('8 9', '0 0 0 0 0 0 0 0 0 0'); 414 SELECT fts5_test_columnsize(t2) FROM t2 WHERE t2 MATCH '000 AND 00 AND 0'; 415} {{3 4} {2 10}} 416 417do_execsql_test 7.1.2 { 418 INSERT INTO t2(t2) VALUES('integrity-check'); 419} 420 421} ;# foreach_detail_mode 422 423finish_test 424