1# 2012 May 25 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#************************************************************************* 11# 12# The tests in this file focus on testing the "unicode" FTS tokenizer. 13# 14 15set testdir [file dirname $argv0] 16source $testdir/tester.tcl 17ifcapable !fts3 { finish_test ; return } 18set ::testprefix fts4unicode 19 20proc do_unicode_token_test {tn input res} { 21 set input [string map {' ''} $input] 22 uplevel [list do_execsql_test $tn " 23 SELECT fts3_tokenizer_test('unicode61', '$input'); 24 " [list [list {*}$res]]] 25} 26 27do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} 28do_unicode_token_test 1.1 {� � �} {0 � � 1 � � 2 � �} 29do_unicode_token_test 1.2 {x�x x�x x�x} {0 x�x x�x 1 x�x x�x 2 x�x x�x} 30 31# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. 32do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" 33do_unicode_token_test 1.4 "\u1E9E" "0 � \u1E9E" 34do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E" 35 36do_unicode_token_test 1.6 "The quick brown fox" { 37 0 the The 1 quick quick 2 brown brown 3 fox fox 38} 39do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" { 40 0 the The 1 quick quick 2 brown brown 3 fox fox 41} 42 43#------------------------------------------------------------------------- 44# 45set docs [list { 46 Enhance the INSERT syntax to allow multiple rows to be inserted via the 47 VALUES clause. 48} { 49 Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause. 50} { 51 Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp(). 52} { 53 Added the sqlite3_db_readonly() interface. 54} { 55 Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the 56 ability to add new PRAGMA statements or to override built-in PRAGMAs. 57} { 58 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on 59 the same row that contains the maximum x value. 60} { 61 Added support for the FTS4 languageid option. 62} { 63 Documented support for the FTS4 content option. This feature has actually 64 been in the code since version 3.7.9 but is only now considered to be 65 officially supported. 66} { 67 Pending statements no longer block ROLLBACK. Instead, the pending statement 68 will return SQLITE_ABORT upon next access after the ROLLBACK. 69} { 70 Improvements to the handling of CSV inputs in the command-line shell 71} { 72 Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be 73 incorrectly converted into an INNER JOIN if the WHERE clause indexable terms 74 connected by OR. 75}] 76 77set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS 78set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS 79set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS 80set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS 81set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS 82set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS 83set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS 84set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS 85set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS 86foreach k [array names map] { 87 lappend mappings [string toupper $k] [lindex $map($k) 0] 88 lappend mappings $k [lindex $map($k) 1] 89} 90proc mapdoc {doc} { 91 set doc [regsub -all {[[:space:]]+} $doc " "] 92 string map $::mappings [string trim $doc] 93} 94 95do_test 2.0 { 96 execsql { CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61, x); } 97 foreach doc $docs { 98 set d [mapdoc $doc] 99 execsql { INSERT INTO t2 VALUES($d) } 100 } 101} {} 102 103do_test 2.1 { 104 set q [mapdoc "row"] 105 execsql { SELECT * FROM t2 WHERE t2 MATCH $q } 106} [list [mapdoc { 107 Queries of the form: "SELECT max(x), y FROM table" returns the value of y on 108 the same row that contains the maximum x value. 109}]] 110 111foreach {tn query snippet} { 112 2 "row" { 113 ...returns the value of y on the same [row] that contains 114 the maximum x value. 115 } 116 3 "ROW" { 117 ...returns the value of y on the same [row] that contains 118 the maximum x value. 119 } 120 4 "rollback" { 121 ...[ROLLBACK]. Instead, the pending statement 122 will return SQLITE_ABORT upon next access after the [ROLLBACK]. 123 } 124 5 "rOllback" { 125 ...[ROLLBACK]. Instead, the pending statement 126 will return SQLITE_ABORT upon next access after the [ROLLBACK]. 127 } 128 6 "lang*" { 129 Added support for the FTS4 [languageid] option. 130 } 131} { 132 do_test 2.$tn { 133 set q [mapdoc $query] 134 execsql { SELECT snippet(t2, '[', ']', '...') FROM t2 WHERE t2 MATCH $q } 135 } [list [mapdoc $snippet]] 136} 137 138finish_test 139 140