1# 2002 May 24 2# 3# The author disclaims copyright to this source code. In place of 4# a legal notice, here is a blessing: 5# 6# May you do good and not evil. 7# May you find forgiveness for yourself and forgive others. 8# May you share freely, never taking more than you give. 9# 10#*********************************************************************** 11# This file implements regression tests for SQLite library. The focus of 12# this file is testing the SQLite routines used for converting between the 13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and 14# UTF-16be). 15# 16# $Id: enc.test,v 1.1 2004/05/22 08:16:11 danielk1977 Exp $ 17 18set testdir [file dirname $argv0] 19source $testdir/tester.tcl 20 21proc do_bincmp_test {testname got expect} { 22 binary scan $expect \c* expectvals 23 binary scan $got \c* gotvals 24 do_test $testname [list set dummy $gotvals] $expectvals 25} 26 27# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around 28# to change the byte-order of the string. 29proc swap_byte_order {utf16} { 30 binary scan $utf16 \c* ints 31 32 foreach {a b} $ints { 33 lappend ints2 $b 34 lappend ints2 $a 35 } 36 37 return [binary format \c* $ints2] 38} 39 40# 41# Test that the SQLite routines for converting between UTF encodings 42# produce the same results as their TCL counterparts. 43# 44# $testname is the prefix to be used for the test names. 45# $str is a string to use for testing (encoded in UTF-8, as normal for TCL). 46# 47# The test procedure is: 48# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and 49# SQLite routines produce the same results. 50# 51# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and 52# SQLite routines produce the same results. 53# 54# 3. Use the SQLite routines to convert the native machine order UTF-16 55# representation back to the original UTF-8. Check that the result 56# matches the original representation. 57# 58# 4. Add a byte-order mark to each of the UTF-16 representations and 59# check that the SQLite routines can convert them back to UTF-8. For 60# byte-order mark info, refer to section 3.10 of the unicode standard. 61# 62# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure 63# that SQLite can convert them both to native byte order UTF-16 64# strings, sans BOM. 65# 66# Coverage: 67# 68# sqlite_utf8to16be (step 2) 69# sqlite_utf8to16le (step 1) 70# sqlite_utf16to8 (steps 3, 4) 71# sqlite_utf16to16le (step 5) 72# sqlite_utf16to16be (step 5) 73# 74proc test_conversion {testname str} { 75 76 # Step 1. 77 set utf16le_sqlite [sqlite_utf8to16le $str] 78 set utf16le_tcl [encoding convertto unicode $str] 79 append utf16le_tcl "\x00\x00" 80 if { $::tcl_platform(byteOrder)!="littleEndian" } { 81 set utf16le_tcl [swap_byte_order $utf16le_tcl] 82 } 83 do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl 84 set utf16le $utf16le_tcl 85 86 # Step 2. 87 set utf16be_sqlite [sqlite_utf8to16be $str] 88 set utf16be_tcl [encoding convertto unicode $str] 89 append utf16be_tcl "\x00\x00" 90 if { $::tcl_platform(byteOrder)=="littleEndian" } { 91 set utf16be_tcl [swap_byte_order $utf16be_tcl] 92 } 93 do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl 94 set utf16be $utf16be_tcl 95 96 # Step 3. 97 if { $::tcl_platform(byteOrder)=="littleEndian" } { 98 set utf16 $utf16le 99 } else { 100 set utf16 $utf16be 101 } 102 set utf8_sqlite [sqlite_utf16to8 $utf16] 103 do_bincmp_test $testname.3 $utf8_sqlite [binarize $str] 104 105 # Step 4 (little endian). 106 append utf16le_bom "\xFF\xFE" $utf16le 107 set utf8_sqlite [sqlite_utf16to8 $utf16le_bom] 108 do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str] 109 110 # Step 4 (big endian). 111 append utf16be_bom "\xFE\xFF" $utf16be 112 set utf8_sqlite [sqlite_utf16to8 $utf16be_bom] 113 do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str] 114 115 # Step 5 (little endian to little endian). 116 set utf16_sqlite [sqlite_utf16to16le $utf16le_bom] 117 do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le 118 119 # Step 5 (big endian to big endian). 120 set utf16_sqlite [sqlite_utf16to16be $utf16be_bom] 121 do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be 122 123 # Step 5 (big endian to little endian). 124 set utf16_sqlite [sqlite_utf16to16le $utf16be_bom] 125 do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le 126 127 # Step 5 (little endian to big endian). 128 set utf16_sqlite [sqlite_utf16to16be $utf16le_bom] 129 do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be 130} 131 132 133test_conversion enc-1 "hello world" 134test_conversion enc-2 "sqlite" 135test_conversion enc-3 "" 136test_conversion enc-4 "\u1234" 137test_conversion enc-5 "\u4321abc" 138test_conversion enc-6 "\u4321\u1234" 139test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100] 140test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100] 141test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100] 142 143finish_test 144 145 146 147 148