1# 2002 May 24 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# This file implements regression tests for SQLite library. The focus of 12# this file is testing the SQLite routines used for converting between the 13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 14# UTF-16be). 15# 16# $Id: enc2.test,v 1.25 2006/01/03 00:33:50 drh Exp $ 17 18set testdir [file dirname $argv0] 19source $testdir/tester.tcl 20 21# If UTF16 support is disabled, ignore the tests in this file 22# 23ifcapable {!utf16} { 24 finish_test 25 return 26} 27 28# The rough organisation of tests in this file is: 29# 30# enc2.1.*: Simple tests with a UTF-8 db. 31# enc2.2.*: Simple tests with a UTF-16LE db. 32# enc2.3.*: Simple tests with a UTF-16BE db. 33# enc2.4.*: Test that attached databases must have the same text encoding 34# as the main database. 35# enc2.5.*: Test the behaviour of the library when a collation sequence is 36# not available for the most desirable text encoding. 37# enc2.6.*: Similar test for user functions. 38# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the 39# wrong text encoding for the database. 40# enc2.8.*: Test sqlite3_complete16() 41# 42 43db close 44 45# Return the UTF-8 representation of the supplied UTF-16 string $str. 46proc utf8 {str} { 47 # If $str ends in two 0x00 0x00 bytes, knock these off before 48 # converting to UTF-8 using TCL. 49 binary scan $str \c* vals 50 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { 51 set str [binary format \c* [lrange $vals 0 end-2]] 52 } 53 54 set r [encoding convertfrom unicode $str] 55 return $r 56} 57 58# 59# This proc contains all the tests in this file. It is run 60# three times. Each time the file 'test.db' contains a database 61# with the following contents: 62set dbcontents { 63 CREATE TABLE t1(a PRIMARY KEY, b, c); 64 INSERT INTO t1 VALUES('one', 'I', 1); 65} 66# This proc tests that we can open and manipulate the test.db 67# database, and that it is possible to retreive values in 68# various text encodings. 69# 70proc run_test_script {t enc} { 71 72# Open the database and pull out a (the) row. 73do_test $t.1 { 74 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 75 execsql {SELECT * FROM t1} 76} {one I 1} 77 78# Insert some data 79do_test $t.2 { 80 execsql {INSERT INTO t1 VALUES('two', 'II', 2);} 81 execsql {SELECT * FROM t1} 82} {one I 1 two II 2} 83 84# Insert some data 85do_test $t.3 { 86 execsql { 87 INSERT INTO t1 VALUES('three','III',3); 88 INSERT INTO t1 VALUES('four','IV',4); 89 INSERT INTO t1 VALUES('five','V',5); 90 } 91 execsql {SELECT * FROM t1} 92} {one I 1 two II 2 three III 3 four IV 4 five V 5} 93 94# Use the index 95do_test $t.4 { 96 execsql { 97 SELECT * FROM t1 WHERE a = 'one'; 98 } 99} {one I 1} 100do_test $t.5 { 101 execsql { 102 SELECT * FROM t1 WHERE a = 'four'; 103 } 104} {four IV 4} 105ifcapable subquery { 106 do_test $t.6 { 107 execsql { 108 SELECT * FROM t1 WHERE a IN ('one', 'two'); 109 } 110 } {one I 1 two II 2} 111} 112 113# Now check that we can retrieve data in both UTF-16 and UTF-8 114do_test $t.7 { 115 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] 116 sqlite3_step $STMT 117 sqlite3_column_text $STMT 0 118} {four} 119 120do_test $t.8 { 121 sqlite3_step $STMT 122 utf8 [sqlite3_column_text16 $STMT 0] 123} {five} 124 125do_test $t.9 { 126 sqlite3_finalize $STMT 127} SQLITE_OK 128 129ifcapable vacuum { 130 execsql VACUUM 131} 132 133do_test $t.10 { 134 db eval {PRAGMA encoding} 135} $enc 136 137} 138 139# The three unicode encodings understood by SQLite. 140set encodings [list UTF-8 UTF-16le UTF-16be] 141 142set sqlite_os_trace 0 143set i 1 144foreach enc $encodings { 145 file delete -force test.db 146 sqlite3 db test.db 147 db eval "PRAGMA encoding = \"$enc\"" 148 execsql $dbcontents 149 db close 150 run_test_script enc2-$i $enc 151 db close 152 incr i 153} 154 155# Test that it is an error to try to attach a database with a different 156# encoding to the main database. 157do_test enc2-4.1 { 158 file delete -force test.db 159 sqlite3 db test.db 160 db eval "PRAGMA encoding = 'UTF-8'" 161 db eval "CREATE TABLE abc(a, b, c);" 162} {} 163do_test enc2-4.2 { 164 file delete -force test2.db 165 sqlite3 db2 test2.db 166 db2 eval "PRAGMA encoding = 'UTF-16'" 167 db2 eval "CREATE TABLE abc(a, b, c);" 168} {} 169do_test enc2-4.3 { 170 catchsql { 171 ATTACH 'test2.db' as aux; 172 } 173} {1 {attached databases must use the same text encoding as main database}} 174 175db2 close 176db close 177 178# The following tests - enc2-5.* - test that SQLite selects the correct 179# collation sequence when more than one is available. 180 181set ::values [list one two three four five] 182set ::test_collate_enc INVALID 183proc test_collate {enc lhs rhs} { 184 set ::test_collate_enc $enc 185 set l [lsearch -exact $::values $lhs] 186 set r [lsearch -exact $::values $rhs] 187 set res [expr $l - $r] 188 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" 189 return $res 190} 191 192file delete -force test.db 193sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 194do_test enc2-5.0 { 195 execsql { 196 CREATE TABLE t5(a); 197 INSERT INTO t5 VALUES('one'); 198 INSERT INTO t5 VALUES('two'); 199 INSERT INTO t5 VALUES('five'); 200 INSERT INTO t5 VALUES('three'); 201 INSERT INTO t5 VALUES('four'); 202 } 203} {} 204do_test enc2-5.1 { 205 add_test_collate $DB 1 1 1 206 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] 207 lappend res $::test_collate_enc 208} {one two three four five UTF-8} 209do_test enc2-5.2 { 210 add_test_collate $DB 0 1 0 211 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 212 lappend res $::test_collate_enc 213} {one two three four five UTF-16LE} 214do_test enc2-5.3 { 215 add_test_collate $DB 0 0 1 216 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 217 lappend res $::test_collate_enc 218} {one two three four five UTF-16BE} 219 220db close 221file delete -force test.db 222sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 223execsql {pragma encoding = 'UTF-16LE'} 224do_test enc2-5.4 { 225 execsql { 226 CREATE TABLE t5(a); 227 INSERT INTO t5 VALUES('one'); 228 INSERT INTO t5 VALUES('two'); 229 INSERT INTO t5 VALUES('five'); 230 INSERT INTO t5 VALUES('three'); 231 INSERT INTO t5 VALUES('four'); 232 } 233} {} 234do_test enc2-5.5 { 235 add_test_collate $DB 1 1 1 236 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 237 lappend res $::test_collate_enc 238} {one two three four five UTF-16LE} 239do_test enc2-5.6 { 240 add_test_collate $DB 1 0 1 241 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 242 lappend res $::test_collate_enc 243} {one two three four five UTF-16BE} 244do_test enc2-5.7 { 245 add_test_collate $DB 1 0 0 246 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 247 lappend res $::test_collate_enc 248} {one two three four five UTF-8} 249 250db close 251file delete -force test.db 252sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 253execsql {pragma encoding = 'UTF-16BE'} 254do_test enc2-5.8 { 255 execsql { 256 CREATE TABLE t5(a); 257 INSERT INTO t5 VALUES('one'); 258 INSERT INTO t5 VALUES('two'); 259 INSERT INTO t5 VALUES('five'); 260 INSERT INTO t5 VALUES('three'); 261 INSERT INTO t5 VALUES('four'); 262 } 263} {} 264do_test enc2-5.9 { 265 add_test_collate $DB 1 1 1 266 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 267 lappend res $::test_collate_enc 268} {one two three four five UTF-16BE} 269do_test enc2-5.10 { 270 add_test_collate $DB 1 1 0 271 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 272 lappend res $::test_collate_enc 273} {one two three four five UTF-16LE} 274do_test enc2-5.11 { 275 add_test_collate $DB 1 0 0 276 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 277 lappend res $::test_collate_enc 278} {one two three four five UTF-8} 279 280# Also test that a UTF-16 collation factory works. 281do_test enc2-5-12 { 282 add_test_collate $DB 0 0 0 283 catchsql { 284 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate 285 } 286} {1 {no such collation sequence: test_collate}} 287do_test enc2-5.13 { 288 add_test_collate_needed $DB 289 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] 290 lappend res $::test_collate_enc 291} {one two three four five UTF-16BE} 292do_test enc2-5.14 { 293 set ::sqlite_last_needed_collation 294} test_collate 295 296db close 297file delete -force test.db 298 299do_test enc2-5.15 { 300 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] 301 add_test_collate_needed $::DB 302 set ::sqlite_last_needed_collation 303} {} 304do_test enc2-5.16 { 305 execsql {CREATE TABLE t1(a varchar collate test_collate);} 306} {} 307do_test enc2-5.17 { 308 set ::sqlite_last_needed_collation 309} {test_collate} 310 311# The following tests - enc2-6.* - test that SQLite selects the correct 312# user function when more than one is available. 313 314proc test_function {enc arg} { 315 return "$enc $arg" 316} 317 318file delete -force test.db 319sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 320execsql {pragma encoding = 'UTF-8'} 321do_test enc2-6.0 { 322 execsql { 323 CREATE TABLE t5(a); 324 INSERT INTO t5 VALUES('one'); 325 } 326} {} 327do_test enc2-6.1 { 328 add_test_function $DB 1 1 1 329 execsql { 330 SELECT test_function('sqlite') 331 } 332} {{UTF-8 sqlite}} 333db close 334sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 335do_test enc2-6.2 { 336 add_test_function $DB 0 1 0 337 execsql { 338 SELECT test_function('sqlite') 339 } 340} {{UTF-16LE sqlite}} 341db close 342sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 343do_test enc2-6.3 { 344 add_test_function $DB 0 0 1 345 execsql { 346 SELECT test_function('sqlite') 347 } 348} {{UTF-16BE sqlite}} 349 350db close 351file delete -force test.db 352sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 353execsql {pragma encoding = 'UTF-16LE'} 354do_test enc2-6.3 { 355 execsql { 356 CREATE TABLE t5(a); 357 INSERT INTO t5 VALUES('sqlite'); 358 } 359} {} 360do_test enc2-6.4 { 361 add_test_function $DB 1 1 1 362 execsql { 363 SELECT test_function('sqlite') 364 } 365} {{UTF-16LE sqlite}} 366db close 367sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 368do_test enc2-6.5 { 369 add_test_function $DB 0 1 0 370 execsql { 371 SELECT test_function('sqlite') 372 } 373} {{UTF-16LE sqlite}} 374db close 375sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 376do_test enc2-6.6 { 377 add_test_function $DB 0 0 1 378 execsql { 379 SELECT test_function('sqlite') 380 } 381} {{UTF-16BE sqlite}} 382 383db close 384file delete -force test.db 385sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 386execsql {pragma encoding = 'UTF-16BE'} 387do_test enc2-6.7 { 388 execsql { 389 CREATE TABLE t5(a); 390 INSERT INTO t5 VALUES('sqlite'); 391 } 392} {} 393do_test enc2-6.8 { 394 add_test_function $DB 1 1 1 395 execsql { 396 SELECT test_function('sqlite') 397 } 398} {{UTF-16BE sqlite}} 399db close 400sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 401do_test enc2-6.9 { 402 add_test_function $DB 0 1 0 403 execsql { 404 SELECT test_function('sqlite') 405 } 406} {{UTF-16LE sqlite}} 407db close 408sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 409do_test enc2-6.10 { 410 add_test_function $DB 0 0 1 411 execsql { 412 SELECT test_function('sqlite') 413 } 414} {{UTF-16BE sqlite}} 415 416 417db close 418file delete -force test.db 419 420# The following tests - enc2-7.* - function as follows: 421# 422# 1: Open an empty database file assuming UTF-16 encoding. 423# 2: Open the same database with a different handle assuming UTF-8. Create 424# a table using this handle. 425# 3: Read the sqlite_master table from the first handle. 426# 4: Ensure the first handle recognises the database encoding is UTF-8. 427# 428do_test enc2-7.1 { 429 sqlite3 db test.db 430 execsql { 431 PRAGMA encoding = 'UTF-16'; 432 SELECT * FROM sqlite_master; 433 } 434} {} 435do_test enc2-7.2 { 436 set enc [execsql { 437 PRAGMA encoding; 438 }] 439 string range $enc 0 end-2 ;# Chop off the "le" or "be" 440} {UTF-16} 441do_test enc2-7.3 { 442 sqlite3 db2 test.db 443 execsql { 444 PRAGMA encoding = 'UTF-8'; 445 CREATE TABLE abc(a, b, c); 446 } db2 447} {} 448do_test enc2-7.4 { 449 execsql { 450 SELECT * FROM sqlite_master; 451 } 452} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" 453do_test enc2-7.5 { 454 execsql { 455 PRAGMA encoding; 456 } 457} {UTF-8} 458 459db close 460db2 close 461 462proc utf16 {utf8} { 463 set utf16 [encoding convertto unicode $utf8] 464 append utf16 "\x00\x00" 465 return $utf16 466} 467ifcapable {complete} { 468 do_test enc2-8.1 { 469 sqlite3_complete16 [utf16 "SELECT * FROM t1;"] 470 } {1} 471 do_test enc2-8.2 { 472 sqlite3_complete16 [utf16 "SELECT * FROM"] 473 } {0} 474} 475 476finish_test 477