1# 2002 May 24 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# This file implements regression tests for SQLite library. The focus of 12# this file is testing the SQLite routines used for converting between the 13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 14# UTF-16be). 15# 16 17set testdir [file dirname $argv0] 18source $testdir/tester.tcl 19 20# If UTF16 support is disabled, ignore the tests in this file 21# 22ifcapable {!utf16} { 23 finish_test 24 return 25} 26 27# The rough organisation of tests in this file is: 28# 29# enc2.1.*: Simple tests with a UTF-8 db. 30# enc2.2.*: Simple tests with a UTF-16LE db. 31# enc2.3.*: Simple tests with a UTF-16BE db. 32# enc2.4.*: Test that attached databases must have the same text encoding 33# as the main database. 34# enc2.5.*: Test the behavior of the library when a collation sequence is 35# not available for the most desirable text encoding. 36# enc2.6.*: Similar test for user functions. 37# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the 38# wrong text encoding for the database. 39# enc2.8.*: Test sqlite3_complete16() 40# 41 42db close 43 44# Return the UTF-8 representation of the supplied UTF-16 string $str. 45proc utf8 {str} { 46 # If $str ends in two 0x00 0x00 bytes, knock these off before 47 # converting to UTF-8 using TCL. 48 binary scan $str \c* vals 49 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { 50 set str [binary format \c* [lrange $vals 0 end-2]] 51 } 52 53 set r [encoding convertfrom unicode $str] 54 return $r 55} 56 57# 58# This proc contains all the tests in this file. It is run 59# three times. Each time the file 'test.db' contains a database 60# with the following contents: 61set dbcontents { 62 CREATE TABLE t1(a PRIMARY KEY, b, c); 63 INSERT INTO t1 VALUES('one', 'I', 1); 64} 65# This proc tests that we can open and manipulate the test.db 66# database, and that it is possible to retreive values in 67# various text encodings. 68# 69proc run_test_script {t enc} { 70 71# Open the database and pull out a (the) row. 72do_test $t.1 { 73 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 74 execsql {SELECT * FROM t1} 75} {one I 1} 76 77# Insert some data 78do_test $t.2 { 79 execsql {INSERT INTO t1 VALUES('two', 'II', 2);} 80 execsql {SELECT * FROM t1} 81} {one I 1 two II 2} 82 83# Insert some data 84do_test $t.3 { 85 execsql { 86 INSERT INTO t1 VALUES('three','III',3); 87 INSERT INTO t1 VALUES('four','IV',4); 88 INSERT INTO t1 VALUES('five','V',5); 89 } 90 execsql {SELECT * FROM t1} 91} {one I 1 two II 2 three III 3 four IV 4 five V 5} 92 93# Use the index 94do_test $t.4 { 95 execsql { 96 SELECT * FROM t1 WHERE a = 'one'; 97 } 98} {one I 1} 99do_test $t.5 { 100 execsql { 101 SELECT * FROM t1 WHERE a = 'four'; 102 } 103} {four IV 4} 104ifcapable subquery { 105 do_test $t.6 { 106 execsql { 107 SELECT * FROM t1 WHERE a IN ('one', 'two'); 108 } 109 } {one I 1 two II 2} 110} 111 112# Now check that we can retrieve data in both UTF-16 and UTF-8 113do_test $t.7 { 114 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] 115 sqlite3_step $STMT 116 sqlite3_column_text $STMT 0 117} {four} 118 119do_test $t.8 { 120 sqlite3_step $STMT 121 utf8 [sqlite3_column_text16 $STMT 0] 122} {five} 123 124do_test $t.9 { 125 sqlite3_finalize $STMT 126} SQLITE_OK 127 128ifcapable vacuum { 129 execsql VACUUM 130} 131 132do_test $t.10 { 133 db eval {PRAGMA encoding} 134} $enc 135 136} 137 138# The three unicode encodings understood by SQLite. 139set encodings [list UTF-8 UTF-16le UTF-16be] 140 141set sqlite_os_trace 0 142set i 1 143foreach enc $encodings { 144 forcedelete test.db 145 sqlite3 db test.db 146 db eval "PRAGMA encoding = \"$enc\"" 147 execsql $dbcontents 148 do_test enc2-$i.0.1 { 149 db eval {PRAGMA encoding} 150 } $enc 151 do_test enc2-$i.0.2 { 152 db eval {PRAGMA encoding=UTF8} 153 db eval {PRAGMA encoding} 154 } $enc 155 do_test enc2-$i.0.3 { 156 db eval {PRAGMA encoding=UTF16le} 157 db eval {PRAGMA encoding} 158 } $enc 159 do_test enc2-$i.0.4 { 160 db eval {PRAGMA encoding=UTF16be} 161 db eval {PRAGMA encoding} 162 } $enc 163 164 db close 165 run_test_script enc2-$i $enc 166 db close 167 incr i 168} 169 170# Test that it is an error to try to attach a database with a different 171# encoding to the main database. 172ifcapable attach { 173 do_test enc2-4.1 { 174 forcedelete test.db 175 sqlite3 db test.db 176 db eval "PRAGMA encoding = 'UTF-8'" 177 db eval "CREATE TABLE abc(a, b, c);" 178 } {} 179 do_test enc2-4.2 { 180 forcedelete test2.db 181 sqlite3 db2 test2.db 182 db2 eval "PRAGMA encoding = 'UTF-16'" 183 db2 eval "CREATE TABLE abc(a, b, c);" 184 } {} 185 do_test enc2-4.3 { 186 catchsql { 187 ATTACH 'test2.db' as aux; 188 } 189 } {1 {attached databases must use the same text encoding as main database}} 190 db2 close 191 db close 192} 193 194# The following tests - enc2-5.* - test that SQLite selects the correct 195# collation sequence when more than one is available. 196 197set ::values [list one two three four five] 198set ::test_collate_enc INVALID 199proc test_collate {enc lhs rhs} { 200 set ::test_collate_enc $enc 201 set l [lsearch -exact $::values $lhs] 202 set r [lsearch -exact $::values $rhs] 203 set res [expr $l - $r] 204 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" 205 return $res 206} 207 208forcedelete test.db 209sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 210do_test enc2-5.0 { 211 execsql { 212 CREATE TABLE t5(a); 213 INSERT INTO t5 VALUES('one'); 214 INSERT INTO t5 VALUES('two'); 215 INSERT INTO t5 VALUES('five'); 216 INSERT INTO t5 VALUES('three'); 217 INSERT INTO t5 VALUES('four'); 218 } 219} {} 220do_test enc2-5.1 { 221 add_test_collate $DB 1 1 1 222 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] 223 lappend res $::test_collate_enc 224} {one two three four five UTF-8} 225do_test enc2-5.2 { 226 add_test_collate $DB 0 1 0 227 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 228 lappend res $::test_collate_enc 229} {one two three four five UTF-16LE} 230do_test enc2-5.3 { 231 add_test_collate $DB 0 0 1 232 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 233 lappend res $::test_collate_enc 234} {one two three four five UTF-16BE} 235 236db close 237forcedelete test.db 238sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 239execsql {pragma encoding = 'UTF-16LE'} 240do_test enc2-5.4 { 241 execsql { 242 CREATE TABLE t5(a); 243 INSERT INTO t5 VALUES('one'); 244 INSERT INTO t5 VALUES('two'); 245 INSERT INTO t5 VALUES('five'); 246 INSERT INTO t5 VALUES('three'); 247 INSERT INTO t5 VALUES('four'); 248 } 249} {} 250do_test enc2-5.5 { 251 add_test_collate $DB 1 1 1 252 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 253 lappend res $::test_collate_enc 254} {one two three four five UTF-16LE} 255do_test enc2-5.6 { 256 add_test_collate $DB 1 0 1 257 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 258 lappend res $::test_collate_enc 259} {one two three four five UTF-16BE} 260do_test enc2-5.7 { 261 add_test_collate $DB 1 0 0 262 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 263 lappend res $::test_collate_enc 264} {one two three four five UTF-8} 265 266db close 267forcedelete test.db 268sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 269execsql {pragma encoding = 'UTF-16BE'} 270do_test enc2-5.8 { 271 execsql { 272 CREATE TABLE t5(a); 273 INSERT INTO t5 VALUES('one'); 274 INSERT INTO t5 VALUES('two'); 275 INSERT INTO t5 VALUES('five'); 276 INSERT INTO t5 VALUES('three'); 277 INSERT INTO t5 VALUES('four'); 278 } 279} {} 280do_test enc2-5.9 { 281 add_test_collate $DB 1 1 1 282 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 283 lappend res $::test_collate_enc 284} {one two three four five UTF-16BE} 285do_test enc2-5.10 { 286 add_test_collate $DB 1 1 0 287 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 288 lappend res $::test_collate_enc 289} {one two three four five UTF-16LE} 290do_test enc2-5.11 { 291 add_test_collate $DB 1 0 0 292 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] 293 lappend res $::test_collate_enc 294} {one two three four five UTF-8} 295 296# Also test that a UTF-16 collation factory works. 297do_test enc2-5-12 { 298 add_test_collate $DB 0 0 0 299 catchsql { 300 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate 301 } 302} {1 {no such collation sequence: test_collate}} 303do_test enc2-5.13 { 304 add_test_collate_needed $DB 305 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] 306 lappend res $::test_collate_enc 307} {one two three four five UTF-16BE} 308do_test enc2-5.14 { 309 set ::sqlite_last_needed_collation 310} test_collate 311 312db close 313forcedelete test.db 314 315do_test enc2-5.15 { 316 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] 317 add_test_collate_needed $::DB 318 set ::sqlite_last_needed_collation 319} {} 320do_test enc2-5.16 { 321 execsql {CREATE TABLE t1(a varchar collate test_collate);} 322} {} 323do_test enc2-5.17 { 324 set ::sqlite_last_needed_collation 325} {test_collate} 326 327# The following tests - enc2-6.* - test that SQLite selects the correct 328# user function when more than one is available. 329 330proc test_function {enc arg} { 331 return "$enc $arg" 332} 333 334db close 335forcedelete test.db 336sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 337execsql {pragma encoding = 'UTF-8'} 338do_test enc2-6.0 { 339 execsql { 340 CREATE TABLE t5(a); 341 INSERT INTO t5 VALUES('one'); 342 } 343} {} 344do_test enc2-6.1 { 345 add_test_function $DB 1 1 1 346 execsql { 347 SELECT test_function('sqlite') 348 } 349} {{UTF-8 sqlite}} 350db close 351sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 352do_test enc2-6.2 { 353 add_test_function $DB 0 1 0 354 execsql { 355 SELECT test_function('sqlite') 356 } 357} {{UTF-16LE sqlite}} 358db close 359sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 360do_test enc2-6.3 { 361 add_test_function $DB 0 0 1 362 execsql { 363 SELECT test_function('sqlite') 364 } 365} {{UTF-16BE sqlite}} 366 367db close 368forcedelete test.db 369sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 370execsql {pragma encoding = 'UTF-16LE'} 371do_test enc2-6.3 { 372 execsql { 373 CREATE TABLE t5(a); 374 INSERT INTO t5 VALUES('sqlite'); 375 } 376} {} 377do_test enc2-6.4 { 378 add_test_function $DB 1 1 1 379 execsql { 380 SELECT test_function('sqlite') 381 } 382} {{UTF-16LE sqlite}} 383db close 384sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 385do_test enc2-6.5 { 386 add_test_function $DB 0 1 0 387 execsql { 388 SELECT test_function('sqlite') 389 } 390} {{UTF-16LE sqlite}} 391db close 392sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 393do_test enc2-6.6 { 394 add_test_function $DB 0 0 1 395 execsql { 396 SELECT test_function('sqlite') 397 } 398} {{UTF-16BE sqlite}} 399 400db close 401forcedelete test.db 402sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 403execsql {pragma encoding = 'UTF-16BE'} 404do_test enc2-6.7 { 405 execsql { 406 CREATE TABLE t5(a); 407 INSERT INTO t5 VALUES('sqlite'); 408 } 409} {} 410do_test enc2-6.8 { 411 add_test_function $DB 1 1 1 412 execsql { 413 SELECT test_function('sqlite') 414 } 415} {{UTF-16BE sqlite}} 416db close 417sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 418do_test enc2-6.9 { 419 add_test_function $DB 0 1 0 420 execsql { 421 SELECT test_function('sqlite') 422 } 423} {{UTF-16LE sqlite}} 424db close 425sqlite3 db test.db; set DB [sqlite3_connection_pointer db] 426do_test enc2-6.10 { 427 add_test_function $DB 0 0 1 428 execsql { 429 SELECT test_function('sqlite') 430 } 431} {{UTF-16BE sqlite}} 432 433 434db close 435forcedelete test.db 436 437# The following tests - enc2-7.* - function as follows: 438# 439# 1: Open an empty database file assuming UTF-16 encoding. 440# 2: Open the same database with a different handle assuming UTF-8. Create 441# a table using this handle. 442# 3: Read the sqlite_master table from the first handle. 443# 4: Ensure the first handle recognises the database encoding is UTF-8. 444# 445do_test enc2-7.1 { 446 sqlite3 db test.db 447 execsql { 448 PRAGMA encoding = 'UTF-16'; 449 SELECT * FROM sqlite_master; 450 } 451} {} 452do_test enc2-7.2 { 453 set enc [execsql { 454 PRAGMA encoding; 455 }] 456 string range $enc 0 end-2 ;# Chop off the "le" or "be" 457} {UTF-16} 458do_test enc2-7.3 { 459 sqlite3 db2 test.db 460 execsql { 461 PRAGMA encoding = 'UTF-8'; 462 CREATE TABLE abc(a, b, c); 463 } db2 464} {} 465do_test enc2-7.4 { 466 execsql { 467 SELECT * FROM sqlite_master; 468 } 469} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" 470do_test enc2-7.5 { 471 execsql { 472 PRAGMA encoding; 473 } 474} {UTF-8} 475 476db close 477db2 close 478 479proc utf16 {utf8} { 480 set utf16 [encoding convertto unicode $utf8] 481 append utf16 "\x00\x00" 482 return $utf16 483} 484ifcapable {complete} { 485 do_test enc2-8.1 { 486 sqlite3_complete16 [utf16 "SELECT * FROM t1;"] 487 } {1} 488 do_test enc2-8.2 { 489 sqlite3_complete16 [utf16 "SELECT * FROM"] 490 } {0} 491} 492 493# Test that the encoding of an empty database may still be set after the 494# (empty) schema has been initialized. 495forcedelete test.db 496do_test enc2-9.1 { 497 sqlite3 db test.db 498 execsql { 499 PRAGMA encoding = 'UTF-8'; 500 PRAGMA encoding; 501 } 502} {UTF-8} 503do_test enc2-9.2 { 504 sqlite3 db test.db 505 execsql { 506 PRAGMA encoding = 'UTF-16le'; 507 PRAGMA encoding; 508 } 509} {UTF-16le} 510do_test enc2-9.3 { 511 sqlite3 db test.db 512 execsql { 513 SELECT * FROM sqlite_master; 514 PRAGMA encoding = 'UTF-8'; 515 PRAGMA encoding; 516 } 517} {UTF-8} 518do_test enc2-9.4 { 519 sqlite3 db test.db 520 execsql { 521 PRAGMA encoding = 'UTF-16le'; 522 CREATE TABLE abc(a, b, c); 523 PRAGMA encoding; 524 } 525} {UTF-16le} 526do_test enc2-9.5 { 527 sqlite3 db test.db 528 execsql { 529 PRAGMA encoding = 'UTF-8'; 530 PRAGMA encoding; 531 } 532} {UTF-16le} 533 534# Ticket #1987. 535# Disallow encoding changes once the encoding has been set. 536# 537do_test enc2-10.1 { 538 db close 539 forcedelete test.db test.db-journal 540 sqlite3 db test.db 541 db eval { 542 PRAGMA encoding=UTF16; 543 CREATE TABLE t1(a); 544 PRAGMA encoding=UTF8; 545 CREATE TABLE t2(b); 546 } 547 db close 548 sqlite3 db test.db 549 db eval { 550 SELECT name FROM sqlite_master 551 } 552} {t1 t2} 553 554# 2020-01-15 ticket a08879a4a476eea9 555# Do not allow a database connection encoding change unless *all* 556# attached databases are empty. 557# 558reset_db 559do_execsql_test enc2-11.10 { 560 PRAGMA encoding=UTF8; 561 CREATE TEMP TABLE t1(x); 562 INSERT INTO t1 VALUES('this is a test'); 563 PRAGMA encoding=UTF16; 564 SELECT * FROM t1; 565} {{this is a test}} 566 567finish_test 568