1# 2012 March 01 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#************************************************************************* 11# This file implements regression tests for SQLite library. The 12# focus of this script is testing the languageid=xxx FTS4 option. 13# 14 15set testdir [file dirname $argv0] 16source $testdir/tester.tcl 17 18# If SQLITE_ENABLE_FTS3 is defined, omit this file. 19ifcapable !fts3 { 20 finish_test 21 return 22} 23 24set ::testprefix fts4langid 25 26#--------------------------------------------------------------------------- 27# Test plan: 28# 29# 1.* - Warm-body tests created for specific purposes during development. 30# Passing these doesn't really prove much. 31# 32# 2.1.* - Test that FTS queries only ever return rows associated with 33# the requested language. 34# 35# 2.2.* - Same as 2.1.*, after an 'optimize' command. 36# 37# 2.3.* - Same as 2.1.*, after a 'rebuild' command. 38# 39# 3.* - Tests with content= tables. Both where there is a real 40# underlying content table and where there is not. 41# 42# 4.* - Test that if one is provided, the tokenizer xLanguage method 43# is called to configure the tokenizer before tokenizing query 44# or document text. 45# 46# 5.* - Test the fts4aux table when the associated FTS4 table contains 47# multiple languages. 48# 49 50do_execsql_test 1.1 { 51 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); 52} 53 54do_execsql_test 1.2 { 55 SELECT sql FROM sqlite_master WHERE name = 't1_content'; 56} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} 57 58do_execsql_test 1.3 {SELECT docid FROM t1} {} 59do_execsql_test 1.4 {SELECT lang_id FROM t1} {} 60 61do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} 62do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} 63 64do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} 65do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} 66 67do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} 68do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} 69 70do_execsql_test 1.11 { 71 CREATE VIRTUAL TABLE t2 USING fts4; 72 INSERT INTO t2 VALUES('abc'); 73} 74do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 75 76do_execsql_test 1.13 { 77 DROP TABLE t1; 78 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 79 INSERT INTO t1(content) VALUES('a b c'); 80 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); 81} 82 83do_execsql_test 1.14 { 84 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; 85} {1} 86do_execsql_test 1.15 { 87 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; 88} {1} 89 90do_execsql_test 1.16 { 91 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; 92} {2} 93 94do_catchsql_test 1.17 { 95 INSERT INTO t1(content, lang_id) VALUES('123', -1); 96} {1 {constraint failed}} 97 98do_execsql_test 1.18 { 99 DROP TABLE t1; 100 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 101 INSERT INTO t1(content, lang_id) VALUES('A', 13); 102 INSERT INTO t1(content, lang_id) VALUES('B', 13); 103 INSERT INTO t1(content, lang_id) VALUES('C', 13); 104 INSERT INTO t1(content, lang_id) VALUES('D', 13); 105 INSERT INTO t1(content, lang_id) VALUES('E', 13); 106 INSERT INTO t1(content, lang_id) VALUES('F', 13); 107 INSERT INTO t1(content, lang_id) VALUES('G', 13); 108 INSERT INTO t1(content, lang_id) VALUES('H', 13); 109 INSERT INTO t1(content, lang_id) VALUES('I', 13); 110 INSERT INTO t1(content, lang_id) VALUES('J', 13); 111 INSERT INTO t1(content, lang_id) VALUES('K', 13); 112 INSERT INTO t1(content, lang_id) VALUES('L', 13); 113 INSERT INTO t1(content, lang_id) VALUES('M', 13); 114 INSERT INTO t1(content, lang_id) VALUES('N', 13); 115 INSERT INTO t1(content, lang_id) VALUES('O', 13); 116 INSERT INTO t1(content, lang_id) VALUES('P', 13); 117 INSERT INTO t1(content, lang_id) VALUES('Q', 13); 118 INSERT INTO t1(content, lang_id) VALUES('R', 13); 119 INSERT INTO t1(content, lang_id) VALUES('S', 13); 120 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; 121} {} 122 123 124#------------------------------------------------------------------------- 125# Test cases 2.* 126# 127proc build_multilingual_db_1 {db} { 128 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } 129 130 set xwords [list zero one two three four five six seven eight nine ten] 131 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] 132 133 for {set i 0} {$i < 1000} {incr i} { 134 set iLangid [expr $i%9] 135 set x "" 136 set y "" 137 138 set x [list] 139 lappend x [lindex $xwords [expr ($i / 1000) % 10]] 140 lappend x [lindex $xwords [expr ($i / 100) % 10]] 141 lappend x [lindex $xwords [expr ($i / 10) % 10]] 142 lappend x [lindex $xwords [expr ($i / 1) % 10]] 143 144 set y [list] 145 lappend y [lindex $ywords [expr ($i / 1000) % 10]] 146 lappend y [lindex $ywords [expr ($i / 100) % 10]] 147 lappend y [lindex $ywords [expr ($i / 10) % 10]] 148 lappend y [lindex $ywords [expr ($i / 1) % 10]] 149 150 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } 151 } 152 153 $db eval { 154 CREATE TABLE data(x, y, l); 155 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; 156 } 157} 158 159proc rowid_list_set_langid {langid} { 160 set ::rowid_list_langid $langid 161} 162proc rowid_list {pattern} { 163 set langid $::rowid_list_langid 164 set res [list] 165 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { 166 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { 167 lappend res $rowid 168 } 169 } 170 return $res 171} 172 173proc or_merge_list {list1 list2} { 174 set res [list] 175 176 set i1 0 177 set i2 0 178 179 set n1 [llength $list1] 180 set n2 [llength $list2] 181 182 while {$i1 < $n1 && $i2 < $n2} { 183 set e1 [lindex $list1 $i1] 184 set e2 [lindex $list2 $i2] 185 186 if {$e1==$e2} { 187 lappend res $e1 188 incr i1 189 incr i2 190 } elseif {$e1 < $e2} { 191 lappend res $e1 192 incr i1 193 } else { 194 lappend res $e2 195 incr i2 196 } 197 } 198 199 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] 200} 201 202proc or_merge_lists {args} { 203 set res [lindex $args 0] 204 for {set i 1} {$i < [llength $args]} {incr i} { 205 set res [or_merge_list $res [lindex $args $i]] 206 } 207 set res 208} 209 210proc and_merge_list {list1 list2} { 211 foreach i $list2 { set a($i) 1 } 212 set res [list] 213 foreach i $list1 { 214 if {[info exists a($i)]} {lappend res $i} 215 } 216 set res 217} 218 219 220proc and_merge_lists {args} { 221 set res [lindex $args 0] 222 for {set i 1} {$i < [llength $args]} {incr i} { 223 set res [and_merge_list $res [lindex $args $i]] 224 } 225 set res 226} 227 228proc filter_list {list langid} { 229 set res [list] 230 foreach i $list { 231 if {($i % 9) == $langid} {lappend res $i} 232 } 233 set res 234} 235 236do_test 2.0 { 237 reset_db 238 build_multilingual_db_1 db 239} {} 240 241proc do_test_query1 {tn query res_script} { 242 for {set langid 0} {$langid < 10} {incr langid} { 243 rowid_list_set_langid $langid 244 set res [eval $res_script] 245 246 set actual [ 247 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} 248 ] 249 do_test $tn.$langid [list set {} $actual] $res 250 } 251} 252 253# Run some queries. 254do_test_query1 2.1.1 {delta} { rowid_list delta } 255do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } 256do_test_query1 2.1.3 {zero one two} { 257 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 258} 259do_test_query1 2.1.4 {"zero one" OR "one two"} { 260 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 261} 262 263# Now try the same tests as above, but after running the 'optimize' 264# command on the FTS table. 265# 266do_execsql_test 2.2 { 267 INSERT INTO t2(t2) VALUES('optimize'); 268 SELECT count(*) FROM t2_segdir; 269} {9} 270do_test_query1 2.2.1 {delta} { rowid_list delta } 271do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } 272do_test_query1 2.2.3 {zero one two} { 273 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 274} 275do_test_query1 2.2.4 {"zero one" OR "one two"} { 276 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 277} 278 279# And rebuild. 280# 281do_test 2.3 { 282 reset_db 283 build_multilingual_db_1 db 284 execsql { INSERT INTO t2(t2) VALUES('rebuild') } 285} {} 286do_test_query1 2.3.1 {delta} { rowid_list delta } 287do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } 288do_test_query1 2.3.3 {zero one two} { 289 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 290} 291do_test_query1 2.3.4 {"zero one" OR "one two"} { 292 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 293} 294 295#------------------------------------------------------------------------- 296# Test cases 3.* 297# 298do_test 3.0 { 299 reset_db 300 build_multilingual_db_1 db 301 execsql { 302 CREATE TABLE t3_data(l, x, y); 303 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; 304 DROP TABLE t2; 305 } 306} {} 307do_execsql_test 3.1 { 308 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); 309 INSERT INTO t2(t2) VALUES('rebuild'); 310} 311 312do_test_query1 3.1.1 {delta} { rowid_list delta } 313do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } 314do_test_query1 3.1.3 {zero one two} { 315 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 316} 317do_test_query1 3.1.4 {"zero one" OR "one two"} { 318 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 319} 320 321do_execsql_test 3.2.1 { 322 DROP TABLE t2; 323 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); 324} 325 326do_execsql_test 3.2.2 { 327 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; 328} 329 330do_execsql_test 3.2.3 { 331 DROP TABLE t3_data; 332} 333 334do_test_query1 3.3.1 {delta} { rowid_list delta } 335do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } 336do_test_query1 3.3.3 {zero one two} { 337 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 338} 339do_test_query1 3.3.4 {"zero one" OR "one two"} { 340 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 341} 342 343do_execsql_test 3.4 { 344 CREATE TABLE t8c(a, b); 345 CREATE VIRTUAL TABLE t8 USING fts4(content=t8c, languageid=langid); 346 INSERT INTO t8(docid, a, b) VALUES(-1, 'one two three', 'x y z'); 347 SELECT docid FROM t8 WHERE t8 MATCH 'one x' AND langid=0 348} {-1} 349 350#------------------------------------------------------------------------- 351# Test cases 4.* 352# 353proc build_multilingual_db_2 {db} { 354 $db eval { 355 CREATE VIRTUAL TABLE t4 USING fts4( 356 tokenize=testtokenizer, 357 languageid=lid 358 ); 359 } 360 for {set i 0} {$i < 50} {incr i} { 361 execsql { 362 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 363 } 364 } 365} 366 367do_test 4.1.0 { 368 reset_db 369 set ptr [fts3_test_tokenizer] 370 sqlite3_db_config db SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER 1 371 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } 372 build_multilingual_db_2 db 373} {} 374do_execsql_test 4.1.1 { 375 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; 376} {0} 377do_execsql_test 4.1.2 { 378 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; 379} {} 380do_execsql_test 4.1.3 { 381 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; 382} {1} 383for {set i 0} {$i < 50} {incr i} { 384 do_execsql_test 4.1.4.$i { 385 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; 386 } [expr 0==($i%2)] 387} 388do_catchsql_test 4.1.5 { 389 INSERT INTO t4(content, lid) VALUES('hello world', 101) 390} {1 {SQL logic error}} 391 392#------------------------------------------------------------------------- 393# Test cases 5.* 394# 395# The following test cases are designed to detect a 32-bit overflow bug 396# that existed at one point. 397# 398proc build_multilingual_db_3 {db} { 399 $db eval { 400 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); 401 } 402 set languages [list 0 1 2 [expr 1<<30]] 403 404 foreach lid $languages { 405 execsql { 406 INSERT INTO t5(docid, content, lid) VALUES( 407 $lid, 'My language is ' || $lid, $lid 408 ) 409 } 410 } 411} 412 413do_test 5.1.0 { 414 reset_db 415 build_multilingual_db_3 db 416} {} 417 418do_execsql_test 5.1.1 { 419 SELECT level FROM t5_segdir; 420} [list 0 1024 2048 [expr 1<<40]] 421 422do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 423foreach langid [list 0 1 2 [expr 1<<30]] { 424 do_execsql_test 5.2.$langid { 425 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid 426 } $langid 427} 428 429set lid [expr 1<<30] 430do_execsql_test 5.3.1 { 431 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); 432 INSERT INTO t6 VALUES('I belong to language 0!'); 433} 434do_test 5.3.2 { 435 for {set i 0} {$i < 20} {incr i} { 436 execsql { 437 INSERT INTO t6(content, lid) VALUES( 438 'I (row '||$i||') belong to langauge N!', $lid 439 ); 440 } 441 } 442 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } 443} {1} 444 445do_test 5.3.3 { 446 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} 447} {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 448 449do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} 450do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} 451do_execsql_test 5.3.6 { 452 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid 453} {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 454 455 456set lid [expr 1<<30] 457foreach lid [list 4 [expr 1<<30]] { 458 do_execsql_test 5.4.$lid.1 { 459 DELETE FROM t6; 460 SELECT count(*) FROM t6_segdir; 461 SELECT count(*) FROM t6_segments; 462 } {0 0} 463 do_execsql_test 5.4.$lid.2 { 464 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); 465 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); 466 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); 467 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); 468 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); 469 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); 470 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); 471 INSERT INTO t6(content, lid) VALUES('one one one', $lid); 472 473 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 474 } {1 2 5} 475 476 do_execsql_test 5.4.$lid.3 { 477 SELECT count(*) FROM t6_segdir; 478 SELECT count(*) FROM t6_segments; 479 } {8 0} 480 481 do_execsql_test 5.4.$lid.4 { 482 INSERT INTO t6(t6) VALUES('merge=100,3'); 483 INSERT INTO t6(t6) VALUES('merge=100,3'); 484 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 485 } {1 2 5} 486 487 do_execsql_test 5.4.$lid.5 { 488 SELECT count(*) FROM t6_segdir; 489 SELECT count(*) FROM t6_segments; 490 } {1 2} 491} 492 493reset_db 494do_execsql_test 6.0 { 495 CREATE VIRTUAL TABLE vt0 USING fts4(c0, languageid="lid"); 496 INSERT INTO vt0 VALUES ('a'), ('b'); 497 BEGIN; 498 UPDATE vt0 SET lid = 1 WHERE lid=0; 499} 500do_execsql_test 6.1 { 501 INSERT INTO vt0(vt0) VALUES('integrity-check'); 502} 503do_execsql_test 6.2 { 504 COMMIT; 505 INSERT INTO vt0(vt0) VALUES('integrity-check'); 506} 507finish_test 508