1# 2012 March 01 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#************************************************************************* 11# This file implements regression tests for SQLite library. The 12# focus of this script is testing the languageid=xxx FTS4 option. 13# 14 15set testdir [file dirname $argv0] 16source $testdir/tester.tcl 17set ::testprefix fts4content 18 19# If SQLITE_ENABLE_FTS3 is defined, omit this file. 20ifcapable !fts3 { 21 finish_test 22 return 23} 24 25set ::testprefix fts4langid 26 27#--------------------------------------------------------------------------- 28# Test plan: 29# 30# 1.* - Warm-body tests created for specific purposes during development. 31# Passing these doesn't really prove much. 32# 33# 2.1.* - Test that FTS queries only ever return rows associated with 34# the requested language. 35# 36# 2.2.* - Same as 2.1.*, after an 'optimize' command. 37# 38# 2.3.* - Same as 2.1.*, after a 'rebuild' command. 39# 40# 3.* - Tests with content= tables. Both where there is a real 41# underlying content table and where there is not. 42# 43# 4.* - Test that if one is provided, the tokenizer xLanguage method 44# is called to configure the tokenizer before tokenizing query 45# or document text. 46# 47# 5.* - Test the fts4aux table when the associated FTS4 table contains 48# multiple languages. 49# 50 51do_execsql_test 1.1 { 52 CREATE VIRTUAL TABLE t1 USING fts4(a, b, languageid=lang_id); 53} 54 55do_execsql_test 1.2 { 56 SELECT sql FROM sqlite_master WHERE name = 't1_content'; 57} {{CREATE TABLE 't1_content'(docid INTEGER PRIMARY KEY, 'c0a', 'c1b', langid)}} 58 59do_execsql_test 1.3 {SELECT docid FROM t1} {} 60do_execsql_test 1.4 {SELECT lang_id FROM t1} {} 61 62do_execsql_test 1.5 {INSERT INTO t1(a, b) VALUES('aaa', 'bbb')} 63do_execsql_test 1.6 {SELECT lang_id FROM t1 } {0} 64 65do_execsql_test 1.7 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 4)} 66do_execsql_test 1.8 {SELECT lang_id FROM t1 } {0 4} 67 68do_execsql_test 1.9 {INSERT INTO t1(a, b, lang_id) VALUES('aaa', 'bbb', 'xyz')} 69do_execsql_test 1.10 {SELECT lang_id FROM t1} {0 4 0} 70 71do_execsql_test 1.11 { 72 CREATE VIRTUAL TABLE t2 USING fts4; 73 INSERT INTO t2 VALUES('abc'); 74} 75do_execsql_test 1.12 { SELECT rowid FROM t2 WHERE content MATCH 'abc' } 1 76 77do_execsql_test 1.13 { 78 DROP TABLE t1; 79 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 80 INSERT INTO t1(content) VALUES('a b c'); 81 INSERT INTO t1(content, lang_id) VALUES('a b c', 1); 82} 83 84do_execsql_test 1.14 { 85 SELECT rowid FROM t1 WHERE t1 MATCH 'b'; 86} {1} 87do_execsql_test 1.15 { 88 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 0; 89} {1} 90 91do_execsql_test 1.16 { 92 SELECT rowid FROM t1 WHERE t1 MATCH 'b' AND lang_id = 1; 93} {2} 94 95do_catchsql_test 1.17 { 96 INSERT INTO t1(content, lang_id) VALUES('123', -1); 97} {1 {constraint failed}} 98 99do_execsql_test 1.18 { 100 DROP TABLE t1; 101 CREATE VIRTUAL TABLE t1 USING fts4(languageid=lang_id); 102 INSERT INTO t1(content, lang_id) VALUES('A', 13); 103 INSERT INTO t1(content, lang_id) VALUES('B', 13); 104 INSERT INTO t1(content, lang_id) VALUES('C', 13); 105 INSERT INTO t1(content, lang_id) VALUES('D', 13); 106 INSERT INTO t1(content, lang_id) VALUES('E', 13); 107 INSERT INTO t1(content, lang_id) VALUES('F', 13); 108 INSERT INTO t1(content, lang_id) VALUES('G', 13); 109 INSERT INTO t1(content, lang_id) VALUES('H', 13); 110 INSERT INTO t1(content, lang_id) VALUES('I', 13); 111 INSERT INTO t1(content, lang_id) VALUES('J', 13); 112 INSERT INTO t1(content, lang_id) VALUES('K', 13); 113 INSERT INTO t1(content, lang_id) VALUES('L', 13); 114 INSERT INTO t1(content, lang_id) VALUES('M', 13); 115 INSERT INTO t1(content, lang_id) VALUES('N', 13); 116 INSERT INTO t1(content, lang_id) VALUES('O', 13); 117 INSERT INTO t1(content, lang_id) VALUES('P', 13); 118 INSERT INTO t1(content, lang_id) VALUES('Q', 13); 119 INSERT INTO t1(content, lang_id) VALUES('R', 13); 120 INSERT INTO t1(content, lang_id) VALUES('S', 13); 121 SELECT rowid FROM t1 WHERE t1 MATCH 'A'; 122} {} 123 124 125#------------------------------------------------------------------------- 126# Test cases 2.* 127# 128proc build_multilingual_db_1 {db} { 129 $db eval { CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l) } 130 131 set xwords [list zero one two three four five six seven eight nine ten] 132 set ywords [list alpha beta gamma delta epsilon zeta eta theta iota kappa] 133 134 for {set i 0} {$i < 1000} {incr i} { 135 set iLangid [expr $i%9] 136 set x "" 137 set y "" 138 139 set x [list] 140 lappend x [lindex $xwords [expr ($i / 1000) % 10]] 141 lappend x [lindex $xwords [expr ($i / 100) % 10]] 142 lappend x [lindex $xwords [expr ($i / 10) % 10]] 143 lappend x [lindex $xwords [expr ($i / 1) % 10]] 144 145 set y [list] 146 lappend y [lindex $ywords [expr ($i / 1000) % 10]] 147 lappend y [lindex $ywords [expr ($i / 100) % 10]] 148 lappend y [lindex $ywords [expr ($i / 10) % 10]] 149 lappend y [lindex $ywords [expr ($i / 1) % 10]] 150 151 $db eval { INSERT INTO t2(docid, x, y, l) VALUES($i, $x, $y, $iLangid) } 152 } 153 154 $db eval { 155 CREATE TABLE data(x, y, l); 156 INSERT INTO data(rowid, x, y, l) SELECT docid, x, y, l FROM t2; 157 } 158} 159 160proc rowid_list_set_langid {langid} { 161 set ::rowid_list_langid $langid 162} 163proc rowid_list {pattern} { 164 set langid $::rowid_list_langid 165 set res [list] 166 db eval {SELECT rowid, x, y FROM data WHERE l = $langid ORDER BY rowid ASC} { 167 if {[string match "*$pattern*" $x] || [string match "*$pattern*" $y]} { 168 lappend res $rowid 169 } 170 } 171 return $res 172} 173 174proc or_merge_list {list1 list2} { 175 set res [list] 176 177 set i1 0 178 set i2 0 179 180 set n1 [llength $list1] 181 set n2 [llength $list2] 182 183 while {$i1 < $n1 && $i2 < $n2} { 184 set e1 [lindex $list1 $i1] 185 set e2 [lindex $list2 $i2] 186 187 if {$e1==$e2} { 188 lappend res $e1 189 incr i1 190 incr i2 191 } elseif {$e1 < $e2} { 192 lappend res $e1 193 incr i1 194 } else { 195 lappend res $e2 196 incr i2 197 } 198 } 199 200 concat $res [lrange $list1 $i1 end] [lrange $list2 $i2 end] 201} 202 203proc or_merge_lists {args} { 204 set res [lindex $args 0] 205 for {set i 1} {$i < [llength $args]} {incr i} { 206 set res [or_merge_list $res [lindex $args $i]] 207 } 208 set res 209} 210 211proc and_merge_list {list1 list2} { 212 foreach i $list2 { set a($i) 1 } 213 set res [list] 214 foreach i $list1 { 215 if {[info exists a($i)]} {lappend res $i} 216 } 217 set res 218} 219 220 221proc and_merge_lists {args} { 222 set res [lindex $args 0] 223 for {set i 1} {$i < [llength $args]} {incr i} { 224 set res [and_merge_list $res [lindex $args $i]] 225 } 226 set res 227} 228 229proc filter_list {list langid} { 230 set res [list] 231 foreach i $list { 232 if {($i % 9) == $langid} {lappend res $i} 233 } 234 set res 235} 236 237do_test 2.0 { 238 reset_db 239 build_multilingual_db_1 db 240} {} 241 242proc do_test_query1 {tn query res_script} { 243 for {set langid 0} {$langid < 10} {incr langid} { 244 rowid_list_set_langid $langid 245 set res [eval $res_script] 246 247 set actual [ 248 execsql {SELECT docid FROM t2 WHERE t2 MATCH $query AND l = $langid} 249 ] 250 do_test $tn.$langid [list set {} $actual] $res 251 } 252} 253 254# Run some queries. 255do_test_query1 2.1.1 {delta} { rowid_list delta } 256do_test_query1 2.1.2 {"zero one two"} { rowid_list "zero one two" } 257do_test_query1 2.1.3 {zero one two} { 258 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 259} 260do_test_query1 2.1.4 {"zero one" OR "one two"} { 261 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 262} 263 264# Now try the same tests as above, but after running the 'optimize' 265# command on the FTS table. 266# 267do_execsql_test 2.2 { 268 INSERT INTO t2(t2) VALUES('optimize'); 269 SELECT count(*) FROM t2_segdir; 270} {9} 271do_test_query1 2.2.1 {delta} { rowid_list delta } 272do_test_query1 2.2.2 {"zero one two"} { rowid_list "zero one two" } 273do_test_query1 2.2.3 {zero one two} { 274 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 275} 276do_test_query1 2.2.4 {"zero one" OR "one two"} { 277 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 278} 279 280# And rebuild. 281# 282do_test 2.3 { 283 reset_db 284 build_multilingual_db_1 db 285 execsql { INSERT INTO t2(t2) VALUES('rebuild') } 286} {} 287do_test_query1 2.3.1 {delta} { rowid_list delta } 288do_test_query1 2.3.2 {"zero one two"} { rowid_list "zero one two" } 289do_test_query1 2.3.3 {zero one two} { 290 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 291} 292do_test_query1 2.3.4 {"zero one" OR "one two"} { 293 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 294} 295 296#------------------------------------------------------------------------- 297# Test cases 3.* 298# 299do_test 3.0 { 300 reset_db 301 build_multilingual_db_1 db 302 execsql { 303 CREATE TABLE t3_data(l, x, y); 304 INSERT INTO t3_data(rowid, l, x, y) SELECT docid, l, x, y FROM t2; 305 DROP TABLE t2; 306 } 307} {} 308do_execsql_test 3.1 { 309 CREATE VIRTUAL TABLE t2 USING fts4(content=t3_data, languageid=l); 310 INSERT INTO t2(t2) VALUES('rebuild'); 311} 312 313do_test_query1 3.1.1 {delta} { rowid_list delta } 314do_test_query1 3.1.2 {"zero one two"} { rowid_list "zero one two" } 315do_test_query1 3.1.3 {zero one two} { 316 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 317} 318do_test_query1 3.1.4 {"zero one" OR "one two"} { 319 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 320} 321 322do_execsql_test 3.2.1 { 323 DROP TABLE t2; 324 CREATE VIRTUAL TABLE t2 USING fts4(x, y, languageid=l, content=nosuchtable); 325} 326 327do_execsql_test 3.2.2 { 328 INSERT INTO t2(docid, x, y, l) SELECT rowid, x, y, l FROM t3_data; 329} 330 331do_execsql_test 3.2.3 { 332 DROP TABLE t3_data; 333} 334 335do_test_query1 3.3.1 {delta} { rowid_list delta } 336do_test_query1 3.3.2 {"zero one two"} { rowid_list "zero one two" } 337do_test_query1 3.3.3 {zero one two} { 338 and_merge_lists [rowid_list zero] [rowid_list one] [rowid_list two] 339} 340do_test_query1 3.3.4 {"zero one" OR "one two"} { 341 or_merge_lists [rowid_list "zero one"] [rowid_list "one two"] 342} 343 344#------------------------------------------------------------------------- 345# Test cases 4.* 346# 347proc build_multilingual_db_2 {db} { 348 $db eval { 349 CREATE VIRTUAL TABLE t4 USING fts4( 350 tokenize=testtokenizer, 351 languageid=lid 352 ); 353 } 354 for {set i 0} {$i < 50} {incr i} { 355 execsql { 356 INSERT INTO t4(docid, content, lid) VALUES($i, 'The Quick Brown Fox', $i) 357 } 358 } 359} 360 361ifcapable fts3_tokenizer { 362 do_test 4.1.0 { 363 reset_db 364 set ptr [fts3_test_tokenizer] 365 execsql { SELECT fts3_tokenizer('testtokenizer', $ptr) } 366 build_multilingual_db_2 db 367 } {} 368 do_execsql_test 4.1.1 { 369 SELECT docid FROM t4 WHERE t4 MATCH 'quick'; 370 } {0} 371 do_execsql_test 4.1.2 { 372 SELECT docid FROM t4 WHERE t4 MATCH 'quick' AND lid=1; 373 } {} 374 do_execsql_test 4.1.3 { 375 SELECT docid FROM t4 WHERE t4 MATCH 'Quick' AND lid=1; 376 } {1} 377 for {set i 0} {$i < 50} {incr i} { 378 do_execsql_test 4.1.4.$i { 379 SELECT count(*) FROM t4 WHERE t4 MATCH 'fox' AND lid=$i; 380 } [expr 0==($i%2)] 381 } 382 do_catchsql_test 4.1.5 { 383 INSERT INTO t4(content, lid) VALUES('hello world', 101) 384 } {1 {SQL logic error or missing database}} 385} 386 387#------------------------------------------------------------------------- 388# Test cases 5.* 389# 390# The following test cases are designed to detect a 32-bit overflow bug 391# that existed at one point. 392# 393proc build_multilingual_db_3 {db} { 394 $db eval { 395 CREATE VIRTUAL TABLE t5 USING fts4(languageid=lid); 396 } 397 set languages [list 0 1 2 [expr 1<<30]] 398 399 foreach lid $languages { 400 execsql { 401 INSERT INTO t5(docid, content, lid) VALUES( 402 $lid, 'My language is ' || $lid, $lid 403 ) 404 } 405 } 406} 407 408do_test 5.1.0 { 409 reset_db 410 build_multilingual_db_3 db 411} {} 412 413do_execsql_test 5.1.1 { 414 SELECT level FROM t5_segdir; 415} [list 0 1024 2048 [expr 1<<40]] 416 417do_execsql_test 5.1.2 {SELECT docid FROM t5 WHERE t5 MATCH 'language'} 0 418foreach langid [list 0 1 2 [expr 1<<30]] { 419 do_execsql_test 5.2.$langid { 420 SELECT docid FROM t5 WHERE t5 MATCH 'language' AND lid = $langid 421 } $langid 422} 423 424set lid [expr 1<<30] 425do_execsql_test 5.3.1 { 426 CREATE VIRTUAL TABLE t6 USING fts4(languageid=lid); 427 INSERT INTO t6 VALUES('I belong to language 0!'); 428} 429do_test 5.3.2 { 430 for {set i 0} {$i < 20} {incr i} { 431 execsql { 432 INSERT INTO t6(content, lid) VALUES( 433 'I (row '||$i||') belong to langauge N!', $lid 434 ); 435 } 436 } 437 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } 438} {1} 439 440do_test 5.3.3 { 441 execsql { SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid} 442} {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 443 444do_execsql_test 5.3.4 { INSERT INTO t6(t6) VALUES('optimize') } {} 445do_execsql_test 5.3.5 { SELECT docid FROM t6 WHERE t6 MATCH 'belong' } {1} 446do_execsql_test 5.3.6 { 447 SELECT docid FROM t6 WHERE t6 MATCH 'belong' AND lid=$lid 448} {2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21} 449 450 451set lid [expr 1<<30] 452foreach lid [list 4 [expr 1<<30]] { 453 do_execsql_test 5.4.$lid.1 { 454 DELETE FROM t6; 455 SELECT count(*) FROM t6_segdir; 456 SELECT count(*) FROM t6_segments; 457 } {0 0} 458 do_execsql_test 5.4.$lid.2 { 459 INSERT INTO t6(content, lid) VALUES('zero zero zero', $lid); 460 INSERT INTO t6(content, lid) VALUES('zero zero one', $lid); 461 INSERT INTO t6(content, lid) VALUES('zero one zero', $lid); 462 INSERT INTO t6(content, lid) VALUES('zero one one', $lid); 463 INSERT INTO t6(content, lid) VALUES('one zero zero', $lid); 464 INSERT INTO t6(content, lid) VALUES('one zero one', $lid); 465 INSERT INTO t6(content, lid) VALUES('one one zero', $lid); 466 INSERT INTO t6(content, lid) VALUES('one one one', $lid); 467 468 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 469 } {1 2 5} 470 471 do_execsql_test 5.4.$lid.3 { 472 SELECT count(*) FROM t6_segdir; 473 SELECT count(*) FROM t6_segments; 474 } {8 0} 475 476 do_execsql_test 5.4.$lid.4 { 477 INSERT INTO t6(t6) VALUES('merge=100,3'); 478 INSERT INTO t6(t6) VALUES('merge=100,3'); 479 SELECT docid FROM t6 WHERE t6 MATCH '"zero zero"' AND lid=$lid; 480 } {1 2 5} 481 482 do_execsql_test 5.4.$lid.5 { 483 SELECT count(*) FROM t6_segdir; 484 SELECT count(*) FROM t6_segments; 485 } {4 4} 486} 487finish_test 488