xref: /sqlite-3.40.0/test/enc2.test (revision 7aa3ebee)
1# 2002 May 24
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library.  The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
16# $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
17
18set testdir [file dirname $argv0]
19source $testdir/tester.tcl
20
21# If UTF16 support is disabled, ignore the tests in this file
22#
23ifcapable {!utf16} {
24  finish_test
25  return
26}
27
28# The rough organisation of tests in this file is:
29#
30# enc2.1.*: Simple tests with a UTF-8 db.
31# enc2.2.*: Simple tests with a UTF-16LE db.
32# enc2.3.*: Simple tests with a UTF-16BE db.
33# enc2.4.*: Test that attached databases must have the same text encoding
34#           as the main database.
35# enc2.5.*: Test the behavior of the library when a collation sequence is
36#           not available for the most desirable text encoding.
37# enc2.6.*: Similar test for user functions.
38# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
39#           wrong text encoding for the database.
40# enc2.8.*: Test sqlite3_complete16()
41#
42
43db close
44
45# Return the UTF-8 representation of the supplied UTF-16 string $str.
46proc utf8 {str} {
47  # If $str ends in two 0x00 0x00 bytes, knock these off before
48  # converting to UTF-8 using TCL.
49  binary scan $str \c* vals
50  if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
51    set str [binary format \c* [lrange $vals 0 end-2]]
52  }
53
54  set r [encoding convertfrom unicode $str]
55  return $r
56}
57
58#
59# This proc contains all the tests in this file. It is run
60# three times. Each time the file 'test.db' contains a database
61# with the following contents:
62set dbcontents {
63  CREATE TABLE t1(a PRIMARY KEY, b, c);
64  INSERT INTO t1 VALUES('one', 'I', 1);
65}
66# This proc tests that we can open and manipulate the test.db
67# database, and that it is possible to retreive values in
68# various text encodings.
69#
70proc run_test_script {t enc} {
71
72# Open the database and pull out a (the) row.
73do_test $t.1 {
74  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
75  execsql {SELECT * FROM t1}
76} {one I 1}
77
78# Insert some data
79do_test $t.2 {
80  execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
81  execsql {SELECT * FROM t1}
82} {one I 1 two II 2}
83
84# Insert some data
85do_test $t.3 {
86  execsql {
87    INSERT INTO t1 VALUES('three','III',3);
88    INSERT INTO t1 VALUES('four','IV',4);
89    INSERT INTO t1 VALUES('five','V',5);
90  }
91  execsql {SELECT * FROM t1}
92} {one I 1 two II 2 three III 3 four IV 4 five V 5}
93
94# Use the index
95do_test $t.4 {
96  execsql {
97    SELECT * FROM t1 WHERE a = 'one';
98  }
99} {one I 1}
100do_test $t.5 {
101  execsql {
102    SELECT * FROM t1 WHERE a = 'four';
103  }
104} {four IV 4}
105ifcapable subquery {
106  do_test $t.6 {
107    execsql {
108      SELECT * FROM t1 WHERE a IN ('one', 'two');
109    }
110  } {one I 1 two II 2}
111}
112
113# Now check that we can retrieve data in both UTF-16 and UTF-8
114do_test $t.7 {
115  set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
116  sqlite3_step $STMT
117  sqlite3_column_text $STMT 0
118} {four}
119
120do_test $t.8 {
121  sqlite3_step $STMT
122  utf8 [sqlite3_column_text16 $STMT 0]
123} {five}
124
125do_test $t.9 {
126  sqlite3_finalize $STMT
127} SQLITE_OK
128
129ifcapable vacuum {
130  execsql VACUUM
131}
132
133do_test $t.10 {
134  db eval {PRAGMA encoding}
135} $enc
136
137}
138
139# The three unicode encodings understood by SQLite.
140set encodings [list UTF-8 UTF-16le UTF-16be]
141
142set sqlite_os_trace 0
143set i 1
144foreach enc $encodings {
145  forcedelete test.db
146  sqlite3 db test.db
147  db eval "PRAGMA encoding = \"$enc\""
148  execsql $dbcontents
149  do_test enc2-$i.0.1 {
150    db eval {PRAGMA encoding}
151  } $enc
152  do_test enc2-$i.0.2 {
153    db eval {PRAGMA encoding=UTF8}
154    db eval {PRAGMA encoding}
155  } $enc
156  do_test enc2-$i.0.3 {
157    db eval {PRAGMA encoding=UTF16le}
158    db eval {PRAGMA encoding}
159  } $enc
160  do_test enc2-$i.0.4 {
161    db eval {PRAGMA encoding=UTF16be}
162    db eval {PRAGMA encoding}
163  } $enc
164
165  db close
166  run_test_script enc2-$i $enc
167  db close
168  incr i
169}
170
171# Test that it is an error to try to attach a database with a different
172# encoding to the main database.
173ifcapable attach {
174  do_test enc2-4.1 {
175    forcedelete test.db
176    sqlite3 db test.db
177    db eval "PRAGMA encoding = 'UTF-8'"
178    db eval "CREATE TABLE abc(a, b, c);"
179  } {}
180  do_test enc2-4.2 {
181    forcedelete test2.db
182    sqlite3 db2 test2.db
183    db2 eval "PRAGMA encoding = 'UTF-16'"
184    db2 eval "CREATE TABLE abc(a, b, c);"
185  } {}
186  do_test enc2-4.3 {
187    catchsql {
188      ATTACH 'test2.db' as aux;
189    }
190  } {1 {attached databases must use the same text encoding as main database}}
191  db2 close
192  db close
193}
194
195# The following tests - enc2-5.* - test that SQLite selects the correct
196# collation sequence when more than one is available.
197
198set ::values [list one two three four five]
199set ::test_collate_enc INVALID
200proc test_collate {enc lhs rhs} {
201  set ::test_collate_enc $enc
202  set l [lsearch -exact $::values $lhs]
203  set r [lsearch -exact $::values $rhs]
204  set res [expr $l - $r]
205  # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
206  return $res
207}
208
209forcedelete test.db
210sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
211do_test enc2-5.0 {
212  execsql {
213    CREATE TABLE t5(a);
214    INSERT INTO t5 VALUES('one');
215    INSERT INTO t5 VALUES('two');
216    INSERT INTO t5 VALUES('five');
217    INSERT INTO t5 VALUES('three');
218    INSERT INTO t5 VALUES('four');
219  }
220} {}
221do_test enc2-5.1 {
222  add_test_collate $DB 1 1 1
223  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
224  lappend res $::test_collate_enc
225} {one two three four five UTF-8}
226do_test enc2-5.2 {
227  add_test_collate $DB 0 1 0
228  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
229  lappend res $::test_collate_enc
230} {one two three four five UTF-16LE}
231do_test enc2-5.3 {
232  add_test_collate $DB 0 0 1
233  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
234  lappend res $::test_collate_enc
235} {one two three four five UTF-16BE}
236
237db close
238forcedelete test.db
239sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
240execsql {pragma encoding = 'UTF-16LE'}
241do_test enc2-5.4 {
242  execsql {
243    CREATE TABLE t5(a);
244    INSERT INTO t5 VALUES('one');
245    INSERT INTO t5 VALUES('two');
246    INSERT INTO t5 VALUES('five');
247    INSERT INTO t5 VALUES('three');
248    INSERT INTO t5 VALUES('four');
249  }
250} {}
251do_test enc2-5.5 {
252  add_test_collate $DB 1 1 1
253  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
254  lappend res $::test_collate_enc
255} {one two three four five UTF-16LE}
256do_test enc2-5.6 {
257  add_test_collate $DB 1 0 1
258  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
259  lappend res $::test_collate_enc
260} {one two three four five UTF-16BE}
261do_test enc2-5.7 {
262  add_test_collate $DB 1 0 0
263  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
264  lappend res $::test_collate_enc
265} {one two three four five UTF-8}
266
267db close
268forcedelete test.db
269sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
270execsql {pragma encoding = 'UTF-16BE'}
271do_test enc2-5.8 {
272  execsql {
273    CREATE TABLE t5(a);
274    INSERT INTO t5 VALUES('one');
275    INSERT INTO t5 VALUES('two');
276    INSERT INTO t5 VALUES('five');
277    INSERT INTO t5 VALUES('three');
278    INSERT INTO t5 VALUES('four');
279  }
280} {}
281do_test enc2-5.9 {
282  add_test_collate $DB 1 1 1
283  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
284  lappend res $::test_collate_enc
285} {one two three four five UTF-16BE}
286do_test enc2-5.10 {
287  add_test_collate $DB 1 1 0
288  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
289  lappend res $::test_collate_enc
290} {one two three four five UTF-16LE}
291do_test enc2-5.11 {
292  add_test_collate $DB 1 0 0
293  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
294  lappend res $::test_collate_enc
295} {one two three four five UTF-8}
296
297# Also test that a UTF-16 collation factory works.
298do_test enc2-5-12 {
299  add_test_collate $DB 0 0 0
300  catchsql {
301    SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
302  }
303} {1 {no such collation sequence: test_collate}}
304do_test enc2-5.13 {
305  add_test_collate_needed $DB
306  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
307  lappend res $::test_collate_enc
308} {one two three four five UTF-16BE}
309do_test enc2-5.14 {
310  set ::sqlite_last_needed_collation
311} test_collate
312
313db close
314forcedelete test.db
315
316do_test enc2-5.15 {
317  sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
318  add_test_collate_needed $::DB
319  set ::sqlite_last_needed_collation
320} {}
321do_test enc2-5.16 {
322  execsql {CREATE TABLE t1(a varchar collate test_collate);}
323} {}
324do_test enc2-5.17 {
325  set ::sqlite_last_needed_collation
326} {test_collate}
327
328# The following tests - enc2-6.* - test that SQLite selects the correct
329# user function when more than one is available.
330
331proc test_function {enc arg} {
332  return "$enc $arg"
333}
334
335db close
336forcedelete test.db
337sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
338execsql {pragma encoding = 'UTF-8'}
339do_test enc2-6.0 {
340  execsql {
341    CREATE TABLE t5(a);
342    INSERT INTO t5 VALUES('one');
343  }
344} {}
345do_test enc2-6.1 {
346  add_test_function $DB 1 1 1
347  execsql {
348    SELECT test_function('sqlite')
349  }
350} {{UTF-8 sqlite}}
351db close
352sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
353do_test enc2-6.2 {
354  add_test_function $DB 0 1 0
355  execsql {
356    SELECT test_function('sqlite')
357  }
358} {{UTF-16LE sqlite}}
359db close
360sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
361do_test enc2-6.3 {
362  add_test_function $DB 0 0 1
363  execsql {
364    SELECT test_function('sqlite')
365  }
366} {{UTF-16BE sqlite}}
367
368db close
369forcedelete test.db
370sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
371execsql {pragma encoding = 'UTF-16LE'}
372do_test enc2-6.3 {
373  execsql {
374    CREATE TABLE t5(a);
375    INSERT INTO t5 VALUES('sqlite');
376  }
377} {}
378do_test enc2-6.4 {
379  add_test_function $DB 1 1 1
380  execsql {
381    SELECT test_function('sqlite')
382  }
383} {{UTF-16LE sqlite}}
384db close
385sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
386do_test enc2-6.5 {
387  add_test_function $DB 0 1 0
388  execsql {
389    SELECT test_function('sqlite')
390  }
391} {{UTF-16LE sqlite}}
392db close
393sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
394do_test enc2-6.6 {
395  add_test_function $DB 0 0 1
396  execsql {
397    SELECT test_function('sqlite')
398  }
399} {{UTF-16BE sqlite}}
400
401db close
402forcedelete test.db
403sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
404execsql {pragma encoding = 'UTF-16BE'}
405do_test enc2-6.7 {
406  execsql {
407    CREATE TABLE t5(a);
408    INSERT INTO t5 VALUES('sqlite');
409  }
410} {}
411do_test enc2-6.8 {
412  add_test_function $DB 1 1 1
413  execsql {
414    SELECT test_function('sqlite')
415  }
416} {{UTF-16BE sqlite}}
417db close
418sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
419do_test enc2-6.9 {
420  add_test_function $DB 0 1 0
421  execsql {
422    SELECT test_function('sqlite')
423  }
424} {{UTF-16LE sqlite}}
425db close
426sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
427do_test enc2-6.10 {
428  add_test_function $DB 0 0 1
429  execsql {
430    SELECT test_function('sqlite')
431  }
432} {{UTF-16BE sqlite}}
433
434
435db close
436forcedelete test.db
437
438# The following tests - enc2-7.* - function as follows:
439#
440# 1: Open an empty database file assuming UTF-16 encoding.
441# 2: Open the same database with a different handle assuming UTF-8. Create
442#    a table using this handle.
443# 3: Read the sqlite_master table from the first handle.
444# 4: Ensure the first handle recognises the database encoding is UTF-8.
445#
446do_test enc2-7.1 {
447  sqlite3 db test.db
448  execsql {
449    PRAGMA encoding = 'UTF-16';
450    SELECT * FROM sqlite_master;
451  }
452} {}
453do_test enc2-7.2 {
454  set enc [execsql {
455    PRAGMA encoding;
456  }]
457  string range $enc 0 end-2 ;# Chop off the "le" or "be"
458} {UTF-16}
459do_test enc2-7.3 {
460  sqlite3 db2 test.db
461  execsql {
462    PRAGMA encoding = 'UTF-8';
463    CREATE TABLE abc(a, b, c);
464  } db2
465} {}
466do_test enc2-7.4 {
467  execsql {
468    SELECT * FROM sqlite_master;
469  }
470} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
471do_test enc2-7.5 {
472  execsql {
473    PRAGMA encoding;
474  }
475} {UTF-8}
476
477db close
478db2 close
479
480proc utf16 {utf8} {
481  set utf16 [encoding convertto unicode $utf8]
482  append utf16 "\x00\x00"
483  return $utf16
484}
485ifcapable {complete} {
486  do_test enc2-8.1 {
487    sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
488  } {1}
489  do_test enc2-8.2 {
490    sqlite3_complete16 [utf16 "SELECT * FROM"]
491  } {0}
492}
493
494# Test that the encoding of an empty database may still be set after the
495# (empty) schema has been initialized.
496forcedelete test.db
497do_test enc2-9.1 {
498  sqlite3 db test.db
499  execsql {
500    PRAGMA encoding = 'UTF-8';
501    PRAGMA encoding;
502  }
503} {UTF-8}
504do_test enc2-9.2 {
505  sqlite3 db test.db
506  execsql {
507    PRAGMA encoding = 'UTF-16le';
508    PRAGMA encoding;
509  }
510} {UTF-16le}
511do_test enc2-9.3 {
512  sqlite3 db test.db
513  execsql {
514    SELECT * FROM sqlite_master;
515    PRAGMA encoding = 'UTF-8';
516    PRAGMA encoding;
517  }
518} {UTF-8}
519do_test enc2-9.4 {
520  sqlite3 db test.db
521  execsql {
522    PRAGMA encoding = 'UTF-16le';
523    CREATE TABLE abc(a, b, c);
524    PRAGMA encoding;
525  }
526} {UTF-16le}
527do_test enc2-9.5 {
528  sqlite3 db test.db
529  execsql {
530    PRAGMA encoding = 'UTF-8';
531    PRAGMA encoding;
532  }
533} {UTF-16le}
534
535# Ticket #1987.
536# Disallow encoding changes once the encoding has been set.
537#
538do_test enc2-10.1 {
539  db close
540  forcedelete test.db test.db-journal
541  sqlite3 db test.db
542  db eval {
543    PRAGMA encoding=UTF16;
544    CREATE TABLE t1(a);
545    PRAGMA encoding=UTF8;
546    CREATE TABLE t2(b);
547  }
548  db close
549  sqlite3 db test.db
550  db eval {
551    SELECT name FROM sqlite_master
552  }
553} {t1 t2}
554
555finish_test
556