128d47b57Sdanielk1977# 2002 May 24 228d47b57Sdanielk1977# 328d47b57Sdanielk1977# The author disclaims copyright to this source code. In place of 428d47b57Sdanielk1977# a legal notice, here is a blessing: 528d47b57Sdanielk1977# 628d47b57Sdanielk1977# May you do good and not evil. 728d47b57Sdanielk1977# May you find forgiveness for yourself and forgive others. 828d47b57Sdanielk1977# May you share freely, never taking more than you give. 928d47b57Sdanielk1977# 1028d47b57Sdanielk1977#*********************************************************************** 1128d47b57Sdanielk1977# This file implements regression tests for SQLite library. The focus of 1228d47b57Sdanielk1977# this file is testing the SQLite routines used for converting between the 1328d47b57Sdanielk1977# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 1428d47b57Sdanielk1977# UTF-16be). 1528d47b57Sdanielk1977# 167677c0ccSdanielk1977# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ 1728d47b57Sdanielk1977 1828d47b57Sdanielk1977set testdir [file dirname $argv0] 1928d47b57Sdanielk1977source $testdir/tester.tcl 2028d47b57Sdanielk1977 216c62608fSdrh# Skip this test if the build does not support multiple encodings. 226c62608fSdrh# 236c62608fSdrhifcapable {!utf16} { 246c62608fSdrh finish_test 256c62608fSdrh return 266c62608fSdrh} 276c62608fSdrh 2828d47b57Sdanielk1977proc do_bincmp_test {testname got expect} { 2928d47b57Sdanielk1977 binary scan $expect \c* expectvals 3028d47b57Sdanielk1977 binary scan $got \c* gotvals 3128d47b57Sdanielk1977 do_test $testname [list set dummy $gotvals] $expectvals 3228d47b57Sdanielk1977} 3328d47b57Sdanielk1977 3428d47b57Sdanielk1977# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around 3528d47b57Sdanielk1977# to change the byte-order of the string. 3628d47b57Sdanielk1977proc swap_byte_order {utf16} { 3728d47b57Sdanielk1977 binary scan $utf16 \c* ints 3828d47b57Sdanielk1977 3928d47b57Sdanielk1977 foreach {a b} $ints { 4028d47b57Sdanielk1977 lappend ints2 $b 4128d47b57Sdanielk1977 lappend ints2 $a 4228d47b57Sdanielk1977 } 4328d47b57Sdanielk1977 4428d47b57Sdanielk1977 return [binary format \c* $ints2] 4528d47b57Sdanielk1977} 4628d47b57Sdanielk1977 4728d47b57Sdanielk1977# 4828d47b57Sdanielk1977# Test that the SQLite routines for converting between UTF encodings 4928d47b57Sdanielk1977# produce the same results as their TCL counterparts. 5028d47b57Sdanielk1977# 5128d47b57Sdanielk1977# $testname is the prefix to be used for the test names. 5228d47b57Sdanielk1977# $str is a string to use for testing (encoded in UTF-8, as normal for TCL). 5328d47b57Sdanielk1977# 5428d47b57Sdanielk1977# The test procedure is: 5528d47b57Sdanielk1977# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and 5628d47b57Sdanielk1977# SQLite routines produce the same results. 5728d47b57Sdanielk1977# 5828d47b57Sdanielk1977# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and 5928d47b57Sdanielk1977# SQLite routines produce the same results. 6028d47b57Sdanielk1977# 6128d47b57Sdanielk1977# 3. Use the SQLite routines to convert the native machine order UTF-16 6228d47b57Sdanielk1977# representation back to the original UTF-8. Check that the result 6328d47b57Sdanielk1977# matches the original representation. 6428d47b57Sdanielk1977# 6528d47b57Sdanielk1977# 4. Add a byte-order mark to each of the UTF-16 representations and 6628d47b57Sdanielk1977# check that the SQLite routines can convert them back to UTF-8. For 6728d47b57Sdanielk1977# byte-order mark info, refer to section 3.10 of the unicode standard. 6828d47b57Sdanielk1977# 6928d47b57Sdanielk1977# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure 7028d47b57Sdanielk1977# that SQLite can convert them both to native byte order UTF-16 7128d47b57Sdanielk1977# strings, sans BOM. 7228d47b57Sdanielk1977# 7328d47b57Sdanielk1977# Coverage: 7428d47b57Sdanielk1977# 7528d47b57Sdanielk1977# sqlite_utf8to16be (step 2) 7628d47b57Sdanielk1977# sqlite_utf8to16le (step 1) 7728d47b57Sdanielk1977# sqlite_utf16to8 (steps 3, 4) 7828d47b57Sdanielk1977# sqlite_utf16to16le (step 5) 7928d47b57Sdanielk1977# sqlite_utf16to16be (step 5) 8028d47b57Sdanielk1977# 8128d47b57Sdanielk1977proc test_conversion {testname str} { 8228d47b57Sdanielk1977 8328d47b57Sdanielk1977 # Step 1. 84ef4ac8f9Sdrh set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] 8528d47b57Sdanielk1977 set utf16le_tcl [encoding convertto unicode $str] 8628d47b57Sdanielk1977 append utf16le_tcl "\x00\x00" 8728d47b57Sdanielk1977 if { $::tcl_platform(byteOrder)!="littleEndian" } { 8828d47b57Sdanielk1977 set utf16le_tcl [swap_byte_order $utf16le_tcl] 8928d47b57Sdanielk1977 } 90ef4ac8f9Sdrh do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl 9128d47b57Sdanielk1977 set utf16le $utf16le_tcl 9228d47b57Sdanielk1977 9328d47b57Sdanielk1977 # Step 2. 94ef4ac8f9Sdrh set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] 9528d47b57Sdanielk1977 set utf16be_tcl [encoding convertto unicode $str] 9628d47b57Sdanielk1977 append utf16be_tcl "\x00\x00" 9728d47b57Sdanielk1977 if { $::tcl_platform(byteOrder)=="littleEndian" } { 9828d47b57Sdanielk1977 set utf16be_tcl [swap_byte_order $utf16be_tcl] 9928d47b57Sdanielk1977 } 100ef4ac8f9Sdrh do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl 10128d47b57Sdanielk1977 set utf16be $utf16be_tcl 10228d47b57Sdanielk1977 10328d47b57Sdanielk1977 # Step 3. 10428d47b57Sdanielk1977 if { $::tcl_platform(byteOrder)=="littleEndian" } { 10528d47b57Sdanielk1977 set utf16 $utf16le 10628d47b57Sdanielk1977 } else { 10728d47b57Sdanielk1977 set utf16 $utf16be 10828d47b57Sdanielk1977 } 109ef4ac8f9Sdrh set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] 110ef4ac8f9Sdrh do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] 11128d47b57Sdanielk1977 11228d47b57Sdanielk1977 # Step 4 (little endian). 11328d47b57Sdanielk1977 append utf16le_bom "\xFF\xFE" $utf16le 1141ba1b551Sdanielk1977 set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] 115ef4ac8f9Sdrh do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] 11628d47b57Sdanielk1977 11728d47b57Sdanielk1977 # Step 4 (big endian). 11828d47b57Sdanielk1977 append utf16be_bom "\xFE\xFF" $utf16be 119ef4ac8f9Sdrh set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] 120ef4ac8f9Sdrh do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] 12128d47b57Sdanielk1977 12228d47b57Sdanielk1977 # Step 5 (little endian to little endian). 123ef4ac8f9Sdrh set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] 124ef4ac8f9Sdrh do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le 12528d47b57Sdanielk1977 12628d47b57Sdanielk1977 # Step 5 (big endian to big endian). 127ef4ac8f9Sdrh set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] 128ef4ac8f9Sdrh do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be 12928d47b57Sdanielk1977 13028d47b57Sdanielk1977 # Step 5 (big endian to little endian). 131ef4ac8f9Sdrh set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] 132ef4ac8f9Sdrh do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le 13328d47b57Sdanielk1977 13428d47b57Sdanielk1977 # Step 5 (little endian to big endian). 135ef4ac8f9Sdrh set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] 136ef4ac8f9Sdrh do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be 13728d47b57Sdanielk1977} 13828d47b57Sdanielk1977 139bfd6cce5Sdanielk1977translate_selftest 14028d47b57Sdanielk1977 14128d47b57Sdanielk1977test_conversion enc-1 "hello world" 14228d47b57Sdanielk1977test_conversion enc-2 "sqlite" 14328d47b57Sdanielk1977test_conversion enc-3 "" 144bfd6cce5Sdanielk1977test_conversion enc-X "\u0100" 14528d47b57Sdanielk1977test_conversion enc-4 "\u1234" 14628d47b57Sdanielk1977test_conversion enc-5 "\u4321abc" 14728d47b57Sdanielk1977test_conversion enc-6 "\u4321\u1234" 14828d47b57Sdanielk1977test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] 14928d47b57Sdanielk1977test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] 15028d47b57Sdanielk1977test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] 151a9c16b0aSdanielk1977test_conversion enc-10 [string repeat "\uE000" 100] 152a9c16b0aSdanielk1977 1537677c0ccSdanielk1977proc test_collate {enc zLeft zRight} { 1547677c0ccSdanielk1977 return [string compare $zLeft $zRight] 1557677c0ccSdanielk1977} 1567677c0ccSdanielk1977add_test_collate $::DB 0 0 1 1577677c0ccSdanielk1977do_test enc-11.1 { 1587677c0ccSdanielk1977 execsql { 1597677c0ccSdanielk1977 CREATE TABLE ab(a COLLATE test_collate, b); 1607677c0ccSdanielk1977 INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); 1617677c0ccSdanielk1977 INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800'); 1627677c0ccSdanielk1977 CREATE INDEX ab_i ON ab(a, b); 1637677c0ccSdanielk1977 } 1647677c0ccSdanielk1977} {} 1657677c0ccSdanielk1977do_test enc-11.2 { 1667677c0ccSdanielk1977 set cp200 "\u00C8" 1677677c0ccSdanielk1977 execsql { 1687677c0ccSdanielk1977 SELECT count(*) FROM ab WHERE a = $::cp200; 1697677c0ccSdanielk1977 } 1707677c0ccSdanielk1977} {2} 1717677c0ccSdanielk1977 172*0ea2d42aSdan#------------------------------------------------------------------------- 173*0ea2d42aSdanreset_db 174*0ea2d42aSdanforcedelete test.db2 175*0ea2d42aSdanforcedelete test.db3 176*0ea2d42aSdan 177*0ea2d42aSdando_execsql_test enc-12.0 { 178*0ea2d42aSdan PRAGMA encoding = 'utf-8'; 179*0ea2d42aSdan CREATE TABLE t1(a, b, c); 180*0ea2d42aSdan INSERT INTO t1 VALUES('a', 'b', 'c'); 181*0ea2d42aSdan ATTACH 'test.db3' AS aux; 182*0ea2d42aSdan CREATE TABLE aux.t3(x, y, z); 183*0ea2d42aSdan INSERT INTO t3 VALUES('xxx', 'yyy', 'zzz'); 184*0ea2d42aSdan PRAGMA encoding; 185*0ea2d42aSdan} {UTF-8} 186*0ea2d42aSdan 187*0ea2d42aSdando_test enc-12.1 { 188*0ea2d42aSdan sqlite3 db2 test.db2 189*0ea2d42aSdan db2 eval { 190*0ea2d42aSdan PRAGMA encoding = 'UTF-16le'; 191*0ea2d42aSdan CREATE TABLE t2(d, e, f); 192*0ea2d42aSdan INSERT INTO t2 VALUES('d', 'e', 'f'); 193*0ea2d42aSdan PRAGMA encoding; 194*0ea2d42aSdan } 195*0ea2d42aSdan} {UTF-16le} 196*0ea2d42aSdan 197*0ea2d42aSdando_test enc-12.2 { 198*0ea2d42aSdan db2 backup test.db 199*0ea2d42aSdan db2 close 200*0ea2d42aSdan} {} 201*0ea2d42aSdan 202*0ea2d42aSdando_catchsql_test enc-12.3 { 203*0ea2d42aSdan SELECT * FROM t2; 204*0ea2d42aSdan} {1 {attached databases must use the same text encoding as main database}} 205*0ea2d42aSdan 206*0ea2d42aSdandb close 207*0ea2d42aSdansqlite3 db test.db3 208*0ea2d42aSdando_execsql_test enc-12.4 { 209*0ea2d42aSdan SELECT * FROM t3; 210*0ea2d42aSdan PRAGMA encoding = 'UTF-16le'; 211*0ea2d42aSdan SELECT * FROM t3; 212*0ea2d42aSdan} {xxx yyy zzz xxx yyy zzz} 213*0ea2d42aSdan 214*0ea2d42aSdandb close 215*0ea2d42aSdansqlite3 db test.db3 216*0ea2d42aSdanbreakpoint 217*0ea2d42aSdando_execsql_test enc-12.5 { 218*0ea2d42aSdan PRAGMA encoding = 'UTF-16le'; 219*0ea2d42aSdan PRAGMA encoding; 220*0ea2d42aSdan} {UTF-8} 221*0ea2d42aSdan 222*0ea2d42aSdanreset_db 223*0ea2d42aSdando_execsql_test enc-12.6 { 224*0ea2d42aSdan PRAGMA encoding = 'UTF-8'; 225*0ea2d42aSdan CREATE TEMP TABLE t1(a, b, c); 226*0ea2d42aSdan INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz'); 227*0ea2d42aSdan} 228*0ea2d42aSdando_test enc-12.7 { 229*0ea2d42aSdan sqlite3 db2 test.db2 230*0ea2d42aSdan db2 backup test.db 231*0ea2d42aSdan db2 close 232*0ea2d42aSdan db eval { 233*0ea2d42aSdan SELECT * FROM t1; 234*0ea2d42aSdan } 235*0ea2d42aSdan} {xxx yyy zzz} 236*0ea2d42aSdando_catchsql_test enc-12.8 { 237*0ea2d42aSdan SELECT * FROM t2; 238*0ea2d42aSdan SELECT * FROM t1; 239*0ea2d42aSdan} {1 {attached databases must use the same text encoding as main database}} 240*0ea2d42aSdan 241*0ea2d42aSdandb close 242*0ea2d42aSdansqlite3 db test.db 243*0ea2d42aSdando_execsql_test enc-12.9 { 244*0ea2d42aSdan CREATE TEMP TABLE t1(a, b, c); 245*0ea2d42aSdan INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz'); 246*0ea2d42aSdan} 247*0ea2d42aSdando_execsql_test enc-12.10 { 248*0ea2d42aSdan SELECT * FROM t2; 249*0ea2d42aSdan SELECT * FROM t1; 250*0ea2d42aSdan} {d e f xxx yyy zzz} 251*0ea2d42aSdan 25228d47b57Sdanielk1977finish_test 253