xref: /sqlite-3.40.0/test/enc.test (revision ef5ecb41)
1# 2002 May 24
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library.  The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
16# $Id: enc.test,v 1.1 2004/05/22 08:16:11 danielk1977 Exp $
17
18set testdir [file dirname $argv0]
19source $testdir/tester.tcl
20
21proc do_bincmp_test {testname got expect} {
22  binary scan $expect \c* expectvals
23  binary scan $got \c* gotvals
24  do_test $testname [list set dummy $gotvals] $expectvals
25}
26
27# $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
28# to change the byte-order of the string.
29proc swap_byte_order {utf16} {
30  binary scan $utf16 \c* ints
31
32  foreach {a b} $ints {
33    lappend ints2 $b
34    lappend ints2 $a
35  }
36
37  return [binary format \c* $ints2]
38}
39
40#
41# Test that the SQLite routines for converting between UTF encodings
42# produce the same results as their TCL counterparts.
43#
44# $testname is the prefix to be used for the test names.
45# $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
46#
47# The test procedure is:
48# 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
49#    SQLite routines produce the same results.
50#
51# 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
52#    SQLite routines produce the same results.
53#
54# 3. Use the SQLite routines to convert the native machine order UTF-16
55#    representation back to the original UTF-8. Check that the result
56#    matches the original representation.
57#
58# 4. Add a byte-order mark to each of the UTF-16 representations and
59#    check that the SQLite routines can convert them back to UTF-8.  For
60#    byte-order mark info, refer to section 3.10 of the unicode standard.
61#
62# 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
63#    that SQLite can convert them both to native byte order UTF-16
64#    strings, sans BOM.
65#
66# Coverage:
67#
68# sqlite_utf8to16be (step 2)
69# sqlite_utf8to16le (step 1)
70# sqlite_utf16to8 (steps 3, 4)
71# sqlite_utf16to16le (step 5)
72# sqlite_utf16to16be (step 5)
73#
74proc test_conversion {testname str} {
75
76  # Step 1.
77  set utf16le_sqlite [sqlite_utf8to16le $str]
78  set utf16le_tcl [encoding convertto unicode $str]
79  append utf16le_tcl "\x00\x00"
80  if { $::tcl_platform(byteOrder)!="littleEndian" } {
81    set utf16le_tcl [swap_byte_order $utf16le_tcl]
82  }
83  do_bincmp_test $testname.1 $utf16le_sqlite $utf16le_tcl
84  set utf16le $utf16le_tcl
85
86  # Step 2.
87  set utf16be_sqlite [sqlite_utf8to16be $str]
88  set utf16be_tcl [encoding convertto unicode $str]
89  append utf16be_tcl "\x00\x00"
90  if { $::tcl_platform(byteOrder)=="littleEndian" } {
91    set utf16be_tcl [swap_byte_order $utf16be_tcl]
92  }
93  do_bincmp_test $testname.2 $utf16be_sqlite $utf16be_tcl
94  set utf16be $utf16be_tcl
95
96  # Step 3.
97  if { $::tcl_platform(byteOrder)=="littleEndian" } {
98    set utf16 $utf16le
99  } else {
100    set utf16 $utf16be
101  }
102  set utf8_sqlite [sqlite_utf16to8 $utf16]
103  do_bincmp_test $testname.3 $utf8_sqlite [binarize $str]
104
105  # Step 4 (little endian).
106  append utf16le_bom "\xFF\xFE" $utf16le
107  set utf8_sqlite [sqlite_utf16to8 $utf16le_bom]
108  do_bincmp_test $testname.4.le $utf8_sqlite [binarize $str]
109
110  # Step 4 (big endian).
111  append utf16be_bom "\xFE\xFF" $utf16be
112  set utf8_sqlite [sqlite_utf16to8 $utf16be_bom]
113  do_bincmp_test $testname.4.be $utf8_sqlite [binarize $str]
114
115  # Step 5 (little endian to little endian).
116  set utf16_sqlite [sqlite_utf16to16le $utf16le_bom]
117  do_bincmp_test $testname.5.le.le $utf16_sqlite $utf16le
118
119  # Step 5 (big endian to big endian).
120  set utf16_sqlite [sqlite_utf16to16be $utf16be_bom]
121  do_bincmp_test $testname.5.be.be $utf16_sqlite $utf16be
122
123  # Step 5 (big endian to little endian).
124  set utf16_sqlite [sqlite_utf16to16le $utf16be_bom]
125  do_bincmp_test $testname.5.be.le $utf16_sqlite $utf16le
126
127  # Step 5 (little endian to big endian).
128  set utf16_sqlite [sqlite_utf16to16be $utf16le_bom]
129  do_bincmp_test $testname.5.le.be $utf16_sqlite $utf16be
130}
131
132
133test_conversion enc-1 "hello world"
134test_conversion enc-2 "sqlite"
135test_conversion enc-3 ""
136test_conversion enc-4 "\u1234"
137test_conversion enc-5 "\u4321abc"
138test_conversion enc-6 "\u4321\u1234"
139test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
140test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
141test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
142
143finish_test
144
145
146
147
148