xref: /sqlite-3.40.0/test/enc2.test (revision ef5ecb41)
1# 2002 May 24
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library.  The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
16# $Id: enc2.test,v 1.8 2004/06/10 14:01:08 danielk1977 Exp $
17
18set testdir [file dirname $argv0]
19source $testdir/tester.tcl
20
21db close
22
23# Return the UTF-8 representation of the supplied UTF-16 string $str.
24proc utf8 {str} {
25  # If $str ends in two 0x00 0x00 bytes, knock these off before
26  # converting to UTF-8 using TCL.
27  binary scan $str \c* vals
28  if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
29    set str [binary format \c* [lrange $vals 0 end-2]]
30  }
31
32  set r [encoding convertfrom unicode $str]
33  return $r
34}
35
36#
37# This proc contains all the tests in this file. It is run
38# three times. Each time the file 'test.db' contains a database
39# with the following contents:
40set dbcontents {
41  CREATE TABLE t1(a PRIMARY KEY, b, c);
42  INSERT INTO t1 VALUES('one', 'I', 1);
43}
44# This proc tests that we can open and manipulate the test.db
45# database, and that it is possible to retreive values in
46# various text encodings.
47#
48proc run_test_script {t enc} {
49
50# Open the database and pull out a (the) row.
51do_test $t.1 {
52  set DB [sqlite db test.db]
53  execsql {SELECT * FROM t1}
54} {one I 1}
55
56# Insert some data
57do_test $t.2 {
58  execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
59  execsql {SELECT * FROM t1}
60} {one I 1 two II 2}
61
62# Insert some data
63do_test $t.3 {
64  execsql {
65    INSERT INTO t1 VALUES('three','III',3);
66    INSERT INTO t1 VALUES('four','IV',4);
67    INSERT INTO t1 VALUES('five','V',5);
68  }
69  execsql {SELECT * FROM t1}
70} {one I 1 two II 2 three III 3 four IV 4 five V 5}
71
72# Use the index
73do_test $t.4 {
74  execsql {
75    SELECT * FROM t1 WHERE a = 'one';
76  }
77} {one I 1}
78do_test $t.5 {
79  execsql {
80    SELECT * FROM t1 WHERE a = 'four';
81  }
82} {four IV 4}
83do_test $t.6 {
84  execsql {
85    SELECT * FROM t1 WHERE a IN ('one', 'two');
86  }
87} {one I 1 two II 2}
88
89# Now check that we can retrieve data in both UTF-16 and UTF-8
90do_test $t.7 {
91  set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
92  sqlite3_step $STMT
93  sqlite3_column_text $STMT 0
94} {four}
95
96do_test $t.8 {
97  sqlite3_step $STMT
98  utf8 [sqlite3_column_text16 $STMT 0]
99} {five}
100
101do_test $t.9 {
102  sqlite3_finalize $STMT
103} SQLITE_OK
104
105do_test $t.10 {
106  db eval {PRAGMA encoding}
107} $enc
108
109}
110
111# The three unicode encodings understood by SQLite.
112set encodings [list UTF-8 UTF-16le UTF-16be]
113
114set i 1
115foreach enc $encodings {
116  file delete -force test.db
117  sqlite db test.db
118  db eval "PRAGMA encoding = \"$enc\""
119  execsql $dbcontents
120  db close
121  run_test_script enc2-$i $enc
122  incr i
123}
124
125# Test that it is an error to try to attach a database with a different
126# encoding to the main database.
127do_test enc2-4.1 {
128  file delete -force test.db
129  sqlite db test.db
130  db eval "PRAGMA encoding = 'UTF-8'"
131  db eval "CREATE TABLE abc(a, b, c);"
132} {}
133do_test enc2-4.2 {
134  file delete -force test2.db
135  sqlite db2 test2.db
136  db2 eval "PRAGMA encoding = 'UTF-16'"
137  db2 eval "CREATE TABLE abc(a, b, c);"
138} {}
139do_test enc2-4.3 {
140  catchsql {
141    ATTACH 'test2.db' as aux;
142  }
143} {1 {attached databases must use the same text encoding as main database}}
144
145db2 close
146db close
147
148# The following tests - enc2-5.* - test that SQLite selects the correct
149# collation sequence when more than one is available.
150
151set ::values [list one two three four five]
152set ::test_collate_enc INVALID
153proc test_collate {enc lhs rhs} {
154  set ::test_collate_enc $enc
155  set l [lsearch -exact $::values $lhs]
156  set r [lsearch -exact $::values $rhs]
157  set res [expr $l - $r]
158  # puts "test_collate $enc $lhs $rhs -> $res"
159  return $res
160}
161
162file delete -force test.db
163set DB [sqlite db test.db]
164do_test enc2-5.0 {
165  execsql {
166    CREATE TABLE t5(a);
167    INSERT INTO t5 VALUES('one');
168    INSERT INTO t5 VALUES('two');
169    INSERT INTO t5 VALUES('five');
170    INSERT INTO t5 VALUES('three');
171    INSERT INTO t5 VALUES('four');
172  }
173} {}
174do_test enc2-5.1 {
175  add_test_collate $DB 1 1 1
176  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
177  lappend res $::test_collate_enc
178} {one two three four five UTF-8}
179do_test enc2-5.2 {
180  add_test_collate $DB 0 1 0
181  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
182  lappend res $::test_collate_enc
183} {one two three four five UTF-16LE}
184breakpoint
185do_test enc2-5.3 {
186  add_test_collate $DB 0 0 1
187  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
188  lappend res $::test_collate_enc
189} {one two three four five UTF-16BE}
190
191file delete -force test.db
192set DB [sqlite db test.db]
193execsql {pragma encoding = 'UTF-16LE'}
194do_test enc2-5.4 {
195  execsql {
196    CREATE TABLE t5(a);
197    INSERT INTO t5 VALUES('one');
198    INSERT INTO t5 VALUES('two');
199    INSERT INTO t5 VALUES('five');
200    INSERT INTO t5 VALUES('three');
201    INSERT INTO t5 VALUES('four');
202  }
203} {}
204do_test enc2-5.5 {
205  add_test_collate $DB 1 1 1
206  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
207  lappend res $::test_collate_enc
208} {one two three four five UTF-16LE}
209do_test enc2-5.6 {
210  add_test_collate $DB 1 0 1
211  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
212  lappend res $::test_collate_enc
213} {one two three four five UTF-16BE}
214breakpoint
215do_test enc2-5.7 {
216  add_test_collate $DB 1 0 0
217  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
218  lappend res $::test_collate_enc
219} {one two three four five UTF-8}
220
221file delete -force test.db
222set DB [sqlite db test.db]
223execsql {pragma encoding = 'UTF-16BE'}
224do_test enc2-5.8 {
225  execsql {
226    CREATE TABLE t5(a);
227    INSERT INTO t5 VALUES('one');
228    INSERT INTO t5 VALUES('two');
229    INSERT INTO t5 VALUES('five');
230    INSERT INTO t5 VALUES('three');
231    INSERT INTO t5 VALUES('four');
232  }
233} {}
234do_test enc2-5.9 {
235  add_test_collate $DB 1 1 1
236  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
237  lappend res $::test_collate_enc
238} {one two three four five UTF-16BE}
239do_test enc2-5.10 {
240  add_test_collate $DB 1 1 0
241  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
242  lappend res $::test_collate_enc
243} {one two three four five UTF-16LE}
244breakpoint
245do_test enc2-5.11 {
246  add_test_collate $DB 1 0 0
247  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
248  lappend res $::test_collate_enc
249} {one two three four five UTF-8}
250
251finish_test
252
253
254
255