1# 2002 May 24 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# This file implements regression tests for SQLite library. The focus of 12# this file is testing the SQLite routines used for converting between the 13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 14# UTF-16be). 15# 16# $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $ 17 18set testdir [file dirname $argv0] 19source $testdir/tester.tcl 20 21# Skip this test if the build does not support multiple encodings. 22# 23ifcapable {!utf16} { 24 finish_test 25 return 26} 27 28proc do_bincmp_test {testname got expect} { 29 binary scan $expect \c* expectvals 30 binary scan $got \c* gotvals 31 do_test $testname [list set dummy $gotvals] $expectvals 32} 33 34# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around 35# to change the byte-order of the string. 36proc swap_byte_order {utf16} { 37 binary scan $utf16 \c* ints 38 39 foreach {a b} $ints { 40 lappend ints2 $b 41 lappend ints2 $a 42 } 43 44 return [binary format \c* $ints2] 45} 46 47# 48# Test that the SQLite routines for converting between UTF encodings 49# produce the same results as their TCL counterparts. 50# 51# $testname is the prefix to be used for the test names. 52# $str is a string to use for testing (encoded in UTF-8, as normal for TCL). 53# 54# The test procedure is: 55# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and 56# SQLite routines produce the same results. 57# 58# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and 59# SQLite routines produce the same results. 60# 61# 3. Use the SQLite routines to convert the native machine order UTF-16 62# representation back to the original UTF-8. Check that the result 63# matches the original representation. 64# 65# 4. Add a byte-order mark to each of the UTF-16 representations and 66# check that the SQLite routines can convert them back to UTF-8. For 67# byte-order mark info, refer to section 3.10 of the unicode standard. 68# 69# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure 70# that SQLite can convert them both to native byte order UTF-16 71# strings, sans BOM. 72# 73# Coverage: 74# 75# sqlite_utf8to16be (step 2) 76# sqlite_utf8to16le (step 1) 77# sqlite_utf16to8 (steps 3, 4) 78# sqlite_utf16to16le (step 5) 79# sqlite_utf16to16be (step 5) 80# 81proc test_conversion {testname str} { 82 83 # Step 1. 84 set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE] 85 set utf16le_tcl [encoding convertto unicode $str] 86 append utf16le_tcl "\x00\x00" 87 if { $::tcl_platform(byteOrder)!="littleEndian" } { 88 set utf16le_tcl [swap_byte_order $utf16le_tcl] 89 } 90 do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl 91 set utf16le $utf16le_tcl 92 93 # Step 2. 94 set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE] 95 set utf16be_tcl [encoding convertto unicode $str] 96 append utf16be_tcl "\x00\x00" 97 if { $::tcl_platform(byteOrder)=="littleEndian" } { 98 set utf16be_tcl [swap_byte_order $utf16be_tcl] 99 } 100 do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl 101 set utf16be $utf16be_tcl 102 103 # Step 3. 104 if { $::tcl_platform(byteOrder)=="littleEndian" } { 105 set utf16 $utf16le 106 } else { 107 set utf16 $utf16be 108 } 109 set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8] 110 do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str] 111 112 # Step 4 (little endian). 113 append utf16le_bom "\xFF\xFE" $utf16le 114 set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1] 115 do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str] 116 117 # Step 4 (big endian). 118 append utf16be_bom "\xFE\xFF" $utf16be 119 set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8] 120 do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str] 121 122 # Step 5 (little endian to little endian). 123 set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE] 124 do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le 125 126 # Step 5 (big endian to big endian). 127 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE] 128 do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be 129 130 # Step 5 (big endian to little endian). 131 set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE] 132 do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le 133 134 # Step 5 (little endian to big endian). 135 set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE] 136 do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be 137} 138 139translate_selftest 140 141test_conversion enc-1 "hello world" 142test_conversion enc-2 "sqlite" 143test_conversion enc-3 "" 144test_conversion enc-X "\u0100" 145test_conversion enc-4 "\u1234" 146test_conversion enc-5 "\u4321abc" 147test_conversion enc-6 "\u4321\u1234" 148test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] 149test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] 150test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] 151test_conversion enc-10 [string repeat "\uE000" 100] 152 153proc test_collate {enc zLeft zRight} { 154 return [string compare $zLeft $zRight] 155} 156add_test_collate $::DB 0 0 1 157do_test enc-11.1 { 158 execsql { 159 CREATE TABLE ab(a COLLATE test_collate, b); 160 INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800'); 161 INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800'); 162 CREATE INDEX ab_i ON ab(a, b); 163 } 164} {} 165do_test enc-11.2 { 166 set cp200 "\u00C8" 167 execsql { 168 SELECT count(*) FROM ab WHERE a = $::cp200; 169 } 170} {2} 171 172#------------------------------------------------------------------------- 173reset_db 174forcedelete test.db2 175forcedelete test.db3 176 177do_execsql_test enc-12.0 { 178 PRAGMA encoding = 'utf-8'; 179 CREATE TABLE t1(a, b, c); 180 INSERT INTO t1 VALUES('a', 'b', 'c'); 181 ATTACH 'test.db3' AS aux; 182 CREATE TABLE aux.t3(x, y, z); 183 INSERT INTO t3 VALUES('xxx', 'yyy', 'zzz'); 184 PRAGMA encoding; 185} {UTF-8} 186 187do_test enc-12.1 { 188 sqlite3 db2 test.db2 189 db2 eval { 190 PRAGMA encoding = 'UTF-16le'; 191 CREATE TABLE t2(d, e, f); 192 INSERT INTO t2 VALUES('d', 'e', 'f'); 193 PRAGMA encoding; 194 } 195} {UTF-16le} 196 197do_test enc-12.2 { 198 db2 backup test.db 199 db2 close 200} {} 201 202do_catchsql_test enc-12.3 { 203 SELECT * FROM t2; 204} {1 {attached databases must use the same text encoding as main database}} 205 206db close 207sqlite3 db test.db3 208do_execsql_test enc-12.4 { 209 SELECT * FROM t3; 210 PRAGMA encoding = 'UTF-16le'; 211 SELECT * FROM t3; 212} {xxx yyy zzz xxx yyy zzz} 213 214db close 215sqlite3 db test.db3 216breakpoint 217do_execsql_test enc-12.5 { 218 PRAGMA encoding = 'UTF-16le'; 219 PRAGMA encoding; 220} {UTF-8} 221 222reset_db 223do_execsql_test enc-12.6 { 224 PRAGMA encoding = 'UTF-8'; 225 CREATE TEMP TABLE t1(a, b, c); 226 INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz'); 227} 228do_test enc-12.7 { 229 sqlite3 db2 test.db2 230 db2 backup test.db 231 db2 close 232 db eval { 233 SELECT * FROM t1; 234 } 235} {xxx yyy zzz} 236do_catchsql_test enc-12.8 { 237 SELECT * FROM t2; 238 SELECT * FROM t1; 239} {1 {attached databases must use the same text encoding as main database}} 240 241db close 242sqlite3 db test.db 243do_execsql_test enc-12.9 { 244 CREATE TEMP TABLE t1(a, b, c); 245 INSERT INTO t1 VALUES('xxx', 'yyy', 'zzz'); 246} 247do_execsql_test enc-12.10 { 248 SELECT * FROM t2; 249 SELECT * FROM t1; 250} {d e f xxx yyy zzz} 251 252finish_test 253