1# 2014 Dec 20 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# 12# Tests focusing on the fts5 tokenizers 13# 14 15source [file join [file dirname [info script]] fts5_common.tcl] 16 17# If SQLITE_ENABLE_FTS5 is defined, omit this file. 18ifcapable !fts5 { 19 finish_test 20 return 21} 22 23proc fts3_unicode_path {file} { 24 file join .. [file dirname [info script]] .. .. fts3 unicode $file 25} 26 27source [fts3_unicode_path parseunicode.tcl] 28set testprefix fts5unicode3 29 30set CF [fts3_unicode_path CaseFolding.txt] 31set UD [fts3_unicode_path UnicodeData.txt] 32 33tl_load_casefolding_txt $CF 34foreach x [an_load_unicodedata_text $UD] { 35 set aNotAlnum($x) 1 36} 37 38foreach {y} [rd_load_unicodedata_text $UD] { 39 foreach {code ascii f} $y {} 40 if {$ascii==""} { 41 set int 0 42 } else { 43 binary scan $ascii c int 44 } 45 set aDiacritic($code,$f) $int 46 if {$f==0} { set aDiacritic($code,1) $int } 47} 48 49proc tcl_fold {i {bRemoveDiacritic 0}} { 50 global tl_lookup_table 51 global aDiacritic 52 set f [expr $bRemoveDiacritic==2] 53 54 if {[info exists tl_lookup_table($i)]} { 55 set i $tl_lookup_table($i) 56 } 57 if {$bRemoveDiacritic && [info exists aDiacritic($i,$f)]} { 58 set i $aDiacritic($i,$f) 59 } 60 expr $i 61} 62db func tcl_fold tcl_fold 63 64proc tcl_isalnum {i} { 65 global aNotAlnum 66 expr {![info exists aNotAlnum($i)]} 67} 68db func tcl_isalnum tcl_isalnum 69 70 71do_catchsql_test 1.0.1 { 72 SELECT fts5_isalnum(1, 2, 3); 73} {1 {wrong number of arguments to function fts5_isalnum}} 74do_catchsql_test 1.0.2 { 75 SELECT fts5_fold(); 76} {1 {wrong number of arguments to function fts5_fold}} 77do_catchsql_test 1.0.3 { 78 SELECT fts5_fold(1,2,3); 79} {1 {wrong number of arguments to function fts5_fold}} 80 81do_execsql_test 1.1 { 82 WITH ii(i) AS ( 83 SELECT -1 84 UNION ALL 85 SELECT i+1 FROM ii WHERE i<100000 86 ) 87 SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int); 88} {0 {}} 89 90do_execsql_test 1.2.1 { 91 WITH ii(i) AS ( 92 SELECT -1 93 UNION ALL 94 SELECT i+1 FROM ii WHERE i<100000 95 ) 96 SELECT count(*), min(i) FROM ii 97 WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int); 98} {0 {}} 99 100do_execsql_test 1.2.2 { 101 WITH ii(i) AS ( 102 SELECT -1 103 UNION ALL 104 SELECT i+1 FROM ii WHERE i<100000 105 ) 106 SELECT count(*), min(i) FROM ii 107 WHERE fts5_fold(i,2)!=CAST(tcl_fold(i,2) AS int); 108} {0 {}} 109 110do_execsql_test 1.3 { 111 WITH ii(i) AS ( 112 SELECT -1 113 UNION ALL 114 SELECT i+1 FROM ii WHERE i<100000 115 ) 116 SELECT count(*), min(i) FROM ii 117 WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int); 118} {0 {}} 119 120do_test 1.4 { 121 set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=} 122 append str {"unicode61 separators '} 123 for {set i 700} {$i<900} {incr i} { 124 append str [format %c $i] 125 } 126 append str {'");} 127 execsql $str 128} {} 129do_test 1.5 { 130 set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=} 131 append str {"unicode61 tokenchars '} 132 for {set i 700} {$i<900} {incr i} { 133 append str [format %c $i] 134 } 135 append str {'");} 136 execsql $str 137} {} 138 139 140finish_test 141