xref: /sqlite-3.40.0/test/enc2.test (revision dddca286)
1# 2002 May 24
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library.  The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
16# $Id: enc2.test,v 1.25 2006/01/03 00:33:50 drh Exp $
17
18set testdir [file dirname $argv0]
19source $testdir/tester.tcl
20
21# If UTF16 support is disabled, ignore the tests in this file
22#
23ifcapable {!utf16} {
24  finish_test
25  return
26}
27
28# The rough organisation of tests in this file is:
29#
30# enc2.1.*: Simple tests with a UTF-8 db.
31# enc2.2.*: Simple tests with a UTF-16LE db.
32# enc2.3.*: Simple tests with a UTF-16BE db.
33# enc2.4.*: Test that attached databases must have the same text encoding
34#           as the main database.
35# enc2.5.*: Test the behaviour of the library when a collation sequence is
36#           not available for the most desirable text encoding.
37# enc2.6.*: Similar test for user functions.
38# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
39#           wrong text encoding for the database.
40# enc2.8.*: Test sqlite3_complete16()
41#
42
43db close
44
45# Return the UTF-8 representation of the supplied UTF-16 string $str.
46proc utf8 {str} {
47  # If $str ends in two 0x00 0x00 bytes, knock these off before
48  # converting to UTF-8 using TCL.
49  binary scan $str \c* vals
50  if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
51    set str [binary format \c* [lrange $vals 0 end-2]]
52  }
53
54  set r [encoding convertfrom unicode $str]
55  return $r
56}
57
58#
59# This proc contains all the tests in this file. It is run
60# three times. Each time the file 'test.db' contains a database
61# with the following contents:
62set dbcontents {
63  CREATE TABLE t1(a PRIMARY KEY, b, c);
64  INSERT INTO t1 VALUES('one', 'I', 1);
65}
66# This proc tests that we can open and manipulate the test.db
67# database, and that it is possible to retreive values in
68# various text encodings.
69#
70proc run_test_script {t enc} {
71
72# Open the database and pull out a (the) row.
73do_test $t.1 {
74  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
75  execsql {SELECT * FROM t1}
76} {one I 1}
77
78# Insert some data
79do_test $t.2 {
80  execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
81  execsql {SELECT * FROM t1}
82} {one I 1 two II 2}
83
84# Insert some data
85do_test $t.3 {
86  execsql {
87    INSERT INTO t1 VALUES('three','III',3);
88    INSERT INTO t1 VALUES('four','IV',4);
89    INSERT INTO t1 VALUES('five','V',5);
90  }
91  execsql {SELECT * FROM t1}
92} {one I 1 two II 2 three III 3 four IV 4 five V 5}
93
94# Use the index
95do_test $t.4 {
96  execsql {
97    SELECT * FROM t1 WHERE a = 'one';
98  }
99} {one I 1}
100do_test $t.5 {
101  execsql {
102    SELECT * FROM t1 WHERE a = 'four';
103  }
104} {four IV 4}
105ifcapable subquery {
106  do_test $t.6 {
107    execsql {
108      SELECT * FROM t1 WHERE a IN ('one', 'two');
109    }
110  } {one I 1 two II 2}
111}
112
113# Now check that we can retrieve data in both UTF-16 and UTF-8
114do_test $t.7 {
115  set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
116  sqlite3_step $STMT
117  sqlite3_column_text $STMT 0
118} {four}
119
120do_test $t.8 {
121  sqlite3_step $STMT
122  utf8 [sqlite3_column_text16 $STMT 0]
123} {five}
124
125do_test $t.9 {
126  sqlite3_finalize $STMT
127} SQLITE_OK
128
129ifcapable vacuum {
130  execsql VACUUM
131}
132
133do_test $t.10 {
134  db eval {PRAGMA encoding}
135} $enc
136
137}
138
139# The three unicode encodings understood by SQLite.
140set encodings [list UTF-8 UTF-16le UTF-16be]
141
142set sqlite_os_trace 0
143set i 1
144foreach enc $encodings {
145  file delete -force test.db
146  sqlite3 db test.db
147  db eval "PRAGMA encoding = \"$enc\""
148  execsql $dbcontents
149  db close
150  run_test_script enc2-$i $enc
151  db close
152  incr i
153}
154
155# Test that it is an error to try to attach a database with a different
156# encoding to the main database.
157do_test enc2-4.1 {
158  file delete -force test.db
159  sqlite3 db test.db
160  db eval "PRAGMA encoding = 'UTF-8'"
161  db eval "CREATE TABLE abc(a, b, c);"
162} {}
163do_test enc2-4.2 {
164  file delete -force test2.db
165  sqlite3 db2 test2.db
166  db2 eval "PRAGMA encoding = 'UTF-16'"
167  db2 eval "CREATE TABLE abc(a, b, c);"
168} {}
169do_test enc2-4.3 {
170  catchsql {
171    ATTACH 'test2.db' as aux;
172  }
173} {1 {attached databases must use the same text encoding as main database}}
174
175db2 close
176db close
177
178# The following tests - enc2-5.* - test that SQLite selects the correct
179# collation sequence when more than one is available.
180
181set ::values [list one two three four five]
182set ::test_collate_enc INVALID
183proc test_collate {enc lhs rhs} {
184  set ::test_collate_enc $enc
185  set l [lsearch -exact $::values $lhs]
186  set r [lsearch -exact $::values $rhs]
187  set res [expr $l - $r]
188  # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
189  return $res
190}
191
192file delete -force test.db
193sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
194do_test enc2-5.0 {
195  execsql {
196    CREATE TABLE t5(a);
197    INSERT INTO t5 VALUES('one');
198    INSERT INTO t5 VALUES('two');
199    INSERT INTO t5 VALUES('five');
200    INSERT INTO t5 VALUES('three');
201    INSERT INTO t5 VALUES('four');
202  }
203} {}
204do_test enc2-5.1 {
205  add_test_collate $DB 1 1 1
206  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
207  lappend res $::test_collate_enc
208} {one two three four five UTF-8}
209do_test enc2-5.2 {
210  add_test_collate $DB 0 1 0
211  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
212  lappend res $::test_collate_enc
213} {one two three four five UTF-16LE}
214do_test enc2-5.3 {
215  add_test_collate $DB 0 0 1
216  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
217  lappend res $::test_collate_enc
218} {one two three four five UTF-16BE}
219
220db close
221file delete -force test.db
222sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
223execsql {pragma encoding = 'UTF-16LE'}
224do_test enc2-5.4 {
225  execsql {
226    CREATE TABLE t5(a);
227    INSERT INTO t5 VALUES('one');
228    INSERT INTO t5 VALUES('two');
229    INSERT INTO t5 VALUES('five');
230    INSERT INTO t5 VALUES('three');
231    INSERT INTO t5 VALUES('four');
232  }
233} {}
234do_test enc2-5.5 {
235  add_test_collate $DB 1 1 1
236  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
237  lappend res $::test_collate_enc
238} {one two three four five UTF-16LE}
239do_test enc2-5.6 {
240  add_test_collate $DB 1 0 1
241  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
242  lappend res $::test_collate_enc
243} {one two three four five UTF-16BE}
244do_test enc2-5.7 {
245  add_test_collate $DB 1 0 0
246  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
247  lappend res $::test_collate_enc
248} {one two three four five UTF-8}
249
250db close
251file delete -force test.db
252sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
253execsql {pragma encoding = 'UTF-16BE'}
254do_test enc2-5.8 {
255  execsql {
256    CREATE TABLE t5(a);
257    INSERT INTO t5 VALUES('one');
258    INSERT INTO t5 VALUES('two');
259    INSERT INTO t5 VALUES('five');
260    INSERT INTO t5 VALUES('three');
261    INSERT INTO t5 VALUES('four');
262  }
263} {}
264do_test enc2-5.9 {
265  add_test_collate $DB 1 1 1
266  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
267  lappend res $::test_collate_enc
268} {one two three four five UTF-16BE}
269do_test enc2-5.10 {
270  add_test_collate $DB 1 1 0
271  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
272  lappend res $::test_collate_enc
273} {one two three four five UTF-16LE}
274do_test enc2-5.11 {
275  add_test_collate $DB 1 0 0
276  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
277  lappend res $::test_collate_enc
278} {one two three four five UTF-8}
279
280# Also test that a UTF-16 collation factory works.
281do_test enc2-5-12 {
282  add_test_collate $DB 0 0 0
283  catchsql {
284    SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
285  }
286} {1 {no such collation sequence: test_collate}}
287do_test enc2-5.13 {
288  add_test_collate_needed $DB
289  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
290  lappend res $::test_collate_enc
291} {one two three four five UTF-16BE}
292do_test enc2-5.14 {
293  set ::sqlite_last_needed_collation
294} test_collate
295
296db close
297file delete -force test.db
298
299do_test enc2-5.15 {
300  sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
301  add_test_collate_needed $::DB
302  set ::sqlite_last_needed_collation
303} {}
304do_test enc2-5.16 {
305  execsql {CREATE TABLE t1(a varchar collate test_collate);}
306} {}
307do_test enc2-5.17 {
308  set ::sqlite_last_needed_collation
309} {test_collate}
310
311# The following tests - enc2-6.* - test that SQLite selects the correct
312# user function when more than one is available.
313
314proc test_function {enc arg} {
315  return "$enc $arg"
316}
317
318file delete -force test.db
319sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
320execsql {pragma encoding = 'UTF-8'}
321do_test enc2-6.0 {
322  execsql {
323    CREATE TABLE t5(a);
324    INSERT INTO t5 VALUES('one');
325  }
326} {}
327do_test enc2-6.1 {
328  add_test_function $DB 1 1 1
329  execsql {
330    SELECT test_function('sqlite')
331  }
332} {{UTF-8 sqlite}}
333db close
334sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
335do_test enc2-6.2 {
336  add_test_function $DB 0 1 0
337  execsql {
338    SELECT test_function('sqlite')
339  }
340} {{UTF-16LE sqlite}}
341db close
342sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
343do_test enc2-6.3 {
344  add_test_function $DB 0 0 1
345  execsql {
346    SELECT test_function('sqlite')
347  }
348} {{UTF-16BE sqlite}}
349
350db close
351file delete -force test.db
352sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
353execsql {pragma encoding = 'UTF-16LE'}
354do_test enc2-6.3 {
355  execsql {
356    CREATE TABLE t5(a);
357    INSERT INTO t5 VALUES('sqlite');
358  }
359} {}
360do_test enc2-6.4 {
361  add_test_function $DB 1 1 1
362  execsql {
363    SELECT test_function('sqlite')
364  }
365} {{UTF-16LE sqlite}}
366db close
367sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
368do_test enc2-6.5 {
369  add_test_function $DB 0 1 0
370  execsql {
371    SELECT test_function('sqlite')
372  }
373} {{UTF-16LE sqlite}}
374db close
375sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
376do_test enc2-6.6 {
377  add_test_function $DB 0 0 1
378  execsql {
379    SELECT test_function('sqlite')
380  }
381} {{UTF-16BE sqlite}}
382
383db close
384file delete -force test.db
385sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
386execsql {pragma encoding = 'UTF-16BE'}
387do_test enc2-6.7 {
388  execsql {
389    CREATE TABLE t5(a);
390    INSERT INTO t5 VALUES('sqlite');
391  }
392} {}
393do_test enc2-6.8 {
394  add_test_function $DB 1 1 1
395  execsql {
396    SELECT test_function('sqlite')
397  }
398} {{UTF-16BE sqlite}}
399db close
400sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
401do_test enc2-6.9 {
402  add_test_function $DB 0 1 0
403  execsql {
404    SELECT test_function('sqlite')
405  }
406} {{UTF-16LE sqlite}}
407db close
408sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
409do_test enc2-6.10 {
410  add_test_function $DB 0 0 1
411  execsql {
412    SELECT test_function('sqlite')
413  }
414} {{UTF-16BE sqlite}}
415
416
417db close
418file delete -force test.db
419
420# The following tests - enc2-7.* - function as follows:
421#
422# 1: Open an empty database file assuming UTF-16 encoding.
423# 2: Open the same database with a different handle assuming UTF-8. Create
424#    a table using this handle.
425# 3: Read the sqlite_master table from the first handle.
426# 4: Ensure the first handle recognises the database encoding is UTF-8.
427#
428do_test enc2-7.1 {
429  sqlite3 db test.db
430  execsql {
431    PRAGMA encoding = 'UTF-16';
432    SELECT * FROM sqlite_master;
433  }
434} {}
435do_test enc2-7.2 {
436  set enc [execsql {
437    PRAGMA encoding;
438  }]
439  string range $enc 0 end-2 ;# Chop off the "le" or "be"
440} {UTF-16}
441do_test enc2-7.3 {
442  sqlite3 db2 test.db
443  execsql {
444    PRAGMA encoding = 'UTF-8';
445    CREATE TABLE abc(a, b, c);
446  } db2
447} {}
448do_test enc2-7.4 {
449  execsql {
450    SELECT * FROM sqlite_master;
451  }
452} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
453do_test enc2-7.5 {
454  execsql {
455    PRAGMA encoding;
456  }
457} {UTF-8}
458
459db close
460db2 close
461
462proc utf16 {utf8} {
463  set utf16 [encoding convertto unicode $utf8]
464  append utf16 "\x00\x00"
465  return $utf16
466}
467ifcapable {complete} {
468  do_test enc2-8.1 {
469    sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
470  } {1}
471  do_test enc2-8.2 {
472    sqlite3_complete16 [utf16 "SELECT * FROM"]
473  } {0}
474}
475
476finish_test
477