xref: /sqlite-3.40.0/test/enc2.test (revision b5f0e405)
1# 2002 May 24
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11# This file implements regression tests for SQLite library.  The focus of
12# this file is testing the SQLite routines used for converting between the
13# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
14# UTF-16be).
15#
16
17set testdir [file dirname $argv0]
18source $testdir/tester.tcl
19
20# If UTF16 support is disabled, ignore the tests in this file
21#
22ifcapable {!utf16} {
23  finish_test
24  return
25}
26
27# The rough organisation of tests in this file is:
28#
29# enc2.1.*: Simple tests with a UTF-8 db.
30# enc2.2.*: Simple tests with a UTF-16LE db.
31# enc2.3.*: Simple tests with a UTF-16BE db.
32# enc2.4.*: Test that attached databases must have the same text encoding
33#           as the main database.
34# enc2.5.*: Test the behavior of the library when a collation sequence is
35#           not available for the most desirable text encoding.
36# enc2.6.*: Similar test for user functions.
37# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
38#           wrong text encoding for the database.
39# enc2.8.*: Test sqlite3_complete16()
40#
41
42db close
43
44# Return the UTF-8 representation of the supplied UTF-16 string $str.
45proc utf8 {str} {
46  # If $str ends in two 0x00 0x00 bytes, knock these off before
47  # converting to UTF-8 using TCL.
48  binary scan $str \c* vals
49  if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
50    set str [binary format \c* [lrange $vals 0 end-2]]
51  }
52
53  set r [encoding convertfrom unicode $str]
54  return $r
55}
56
57#
58# This proc contains all the tests in this file. It is run
59# three times. Each time the file 'test.db' contains a database
60# with the following contents:
61set dbcontents {
62  CREATE TABLE t1(a PRIMARY KEY, b, c);
63  INSERT INTO t1 VALUES('one', 'I', 1);
64}
65# This proc tests that we can open and manipulate the test.db
66# database, and that it is possible to retreive values in
67# various text encodings.
68#
69proc run_test_script {t enc} {
70
71# Open the database and pull out a (the) row.
72do_test $t.1 {
73  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
74  execsql {SELECT * FROM t1}
75} {one I 1}
76
77# Insert some data
78do_test $t.2 {
79  execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
80  execsql {SELECT * FROM t1}
81} {one I 1 two II 2}
82
83# Insert some data
84do_test $t.3 {
85  execsql {
86    INSERT INTO t1 VALUES('three','III',3);
87    INSERT INTO t1 VALUES('four','IV',4);
88    INSERT INTO t1 VALUES('five','V',5);
89  }
90  execsql {SELECT * FROM t1}
91} {one I 1 two II 2 three III 3 four IV 4 five V 5}
92
93# Use the index
94do_test $t.4 {
95  execsql {
96    SELECT * FROM t1 WHERE a = 'one';
97  }
98} {one I 1}
99do_test $t.5 {
100  execsql {
101    SELECT * FROM t1 WHERE a = 'four';
102  }
103} {four IV 4}
104ifcapable subquery {
105  do_test $t.6 {
106    execsql {
107      SELECT * FROM t1 WHERE a IN ('one', 'two');
108    }
109  } {one I 1 two II 2}
110}
111
112# Now check that we can retrieve data in both UTF-16 and UTF-8
113do_test $t.7 {
114  set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
115  sqlite3_step $STMT
116  sqlite3_column_text $STMT 0
117} {four}
118
119do_test $t.8 {
120  sqlite3_step $STMT
121  utf8 [sqlite3_column_text16 $STMT 0]
122} {five}
123
124do_test $t.9 {
125  sqlite3_finalize $STMT
126} SQLITE_OK
127
128ifcapable vacuum {
129  execsql VACUUM
130}
131
132do_test $t.10 {
133  db eval {PRAGMA encoding}
134} $enc
135
136}
137
138# The three unicode encodings understood by SQLite.
139set encodings [list UTF-8 UTF-16le UTF-16be]
140
141set sqlite_os_trace 0
142set i 1
143foreach enc $encodings {
144  forcedelete test.db
145  sqlite3 db test.db
146  db eval "PRAGMA encoding = \"$enc\""
147  execsql $dbcontents
148  do_test enc2-$i.0.1 {
149    db eval {PRAGMA encoding}
150  } $enc
151  do_test enc2-$i.0.2 {
152    db eval {PRAGMA encoding=UTF8}
153    db eval {PRAGMA encoding}
154  } $enc
155  do_test enc2-$i.0.3 {
156    db eval {PRAGMA encoding=UTF16le}
157    db eval {PRAGMA encoding}
158  } $enc
159  do_test enc2-$i.0.4 {
160    db eval {PRAGMA encoding=UTF16be}
161    db eval {PRAGMA encoding}
162  } $enc
163
164  db close
165  run_test_script enc2-$i $enc
166  db close
167  incr i
168}
169
170# Test that it is an error to try to attach a database with a different
171# encoding to the main database.
172ifcapable attach {
173  do_test enc2-4.1 {
174    forcedelete test.db
175    sqlite3 db test.db
176    db eval "PRAGMA encoding = 'UTF-8'"
177    db eval "CREATE TABLE abc(a, b, c);"
178  } {}
179  do_test enc2-4.2 {
180    forcedelete test2.db
181    sqlite3 db2 test2.db
182    db2 eval "PRAGMA encoding = 'UTF-16'"
183    db2 eval "CREATE TABLE abc(a, b, c);"
184  } {}
185  do_test enc2-4.3 {
186    catchsql {
187      ATTACH 'test2.db' as aux;
188    }
189  } {1 {attached databases must use the same text encoding as main database}}
190  db2 close
191  db close
192}
193
194# The following tests - enc2-5.* - test that SQLite selects the correct
195# collation sequence when more than one is available.
196
197set ::values [list one two three four five]
198set ::test_collate_enc INVALID
199proc test_collate {enc lhs rhs} {
200  set ::test_collate_enc $enc
201  set l [lsearch -exact $::values $lhs]
202  set r [lsearch -exact $::values $rhs]
203  set res [expr $l - $r]
204  # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
205  return $res
206}
207
208forcedelete test.db
209sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
210do_test enc2-5.0 {
211  execsql {
212    CREATE TABLE t5(a);
213    INSERT INTO t5 VALUES('one');
214    INSERT INTO t5 VALUES('two');
215    INSERT INTO t5 VALUES('five');
216    INSERT INTO t5 VALUES('three');
217    INSERT INTO t5 VALUES('four');
218  }
219} {}
220do_test enc2-5.1 {
221  add_test_collate $DB 1 1 1
222  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
223  lappend res $::test_collate_enc
224} {one two three four five UTF-8}
225do_test enc2-5.2 {
226  add_test_collate $DB 0 1 0
227  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
228  lappend res $::test_collate_enc
229} {one two three four five UTF-16LE}
230do_test enc2-5.3 {
231  add_test_collate $DB 0 0 1
232  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
233  lappend res $::test_collate_enc
234} {one two three four five UTF-16BE}
235
236db close
237forcedelete test.db
238sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
239execsql {pragma encoding = 'UTF-16LE'}
240do_test enc2-5.4 {
241  execsql {
242    CREATE TABLE t5(a);
243    INSERT INTO t5 VALUES('one');
244    INSERT INTO t5 VALUES('two');
245    INSERT INTO t5 VALUES('five');
246    INSERT INTO t5 VALUES('three');
247    INSERT INTO t5 VALUES('four');
248  }
249} {}
250do_test enc2-5.5 {
251  add_test_collate $DB 1 1 1
252  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
253  lappend res $::test_collate_enc
254} {one two three four five UTF-16LE}
255do_test enc2-5.6 {
256  add_test_collate $DB 1 0 1
257  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
258  lappend res $::test_collate_enc
259} {one two three four five UTF-16BE}
260do_test enc2-5.7 {
261  add_test_collate $DB 1 0 0
262  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
263  lappend res $::test_collate_enc
264} {one two three four five UTF-8}
265
266db close
267forcedelete test.db
268sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
269execsql {pragma encoding = 'UTF-16BE'}
270do_test enc2-5.8 {
271  execsql {
272    CREATE TABLE t5(a);
273    INSERT INTO t5 VALUES('one');
274    INSERT INTO t5 VALUES('two');
275    INSERT INTO t5 VALUES('five');
276    INSERT INTO t5 VALUES('three');
277    INSERT INTO t5 VALUES('four');
278  }
279} {}
280do_test enc2-5.9 {
281  add_test_collate $DB 1 1 1
282  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
283  lappend res $::test_collate_enc
284} {one two three four five UTF-16BE}
285do_test enc2-5.10 {
286  add_test_collate $DB 1 1 0
287  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
288  lappend res $::test_collate_enc
289} {one two three four five UTF-16LE}
290do_test enc2-5.11 {
291  add_test_collate $DB 1 0 0
292  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
293  lappend res $::test_collate_enc
294} {one two three four five UTF-8}
295
296# Also test that a UTF-16 collation factory works.
297do_test enc2-5-12 {
298  add_test_collate $DB 0 0 0
299  catchsql {
300    SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
301  }
302} {1 {no such collation sequence: test_collate}}
303do_test enc2-5.13 {
304  add_test_collate_needed $DB
305  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
306  lappend res $::test_collate_enc
307} {one two three four five UTF-16BE}
308do_test enc2-5.14 {
309  set ::sqlite_last_needed_collation
310} test_collate
311
312db close
313forcedelete test.db
314
315do_test enc2-5.15 {
316  sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
317  add_test_collate_needed $::DB
318  set ::sqlite_last_needed_collation
319} {}
320do_test enc2-5.16 {
321  execsql {CREATE TABLE t1(a varchar collate test_collate);}
322} {}
323do_test enc2-5.17 {
324  set ::sqlite_last_needed_collation
325} {test_collate}
326
327# The following tests - enc2-6.* - test that SQLite selects the correct
328# user function when more than one is available.
329
330proc test_function {enc arg} {
331  return "$enc $arg"
332}
333
334db close
335forcedelete test.db
336sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
337execsql {pragma encoding = 'UTF-8'}
338do_test enc2-6.0 {
339  execsql {
340    CREATE TABLE t5(a);
341    INSERT INTO t5 VALUES('one');
342  }
343} {}
344do_test enc2-6.1 {
345  add_test_function $DB 1 1 1
346  execsql {
347    SELECT test_function('sqlite')
348  }
349} {{UTF-8 sqlite}}
350db close
351sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
352do_test enc2-6.2 {
353  add_test_function $DB 0 1 0
354  execsql {
355    SELECT test_function('sqlite')
356  }
357} {{UTF-16LE sqlite}}
358db close
359sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
360do_test enc2-6.3 {
361  add_test_function $DB 0 0 1
362  execsql {
363    SELECT test_function('sqlite')
364  }
365} {{UTF-16BE sqlite}}
366
367db close
368forcedelete test.db
369sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
370execsql {pragma encoding = 'UTF-16LE'}
371do_test enc2-6.3 {
372  execsql {
373    CREATE TABLE t5(a);
374    INSERT INTO t5 VALUES('sqlite');
375  }
376} {}
377do_test enc2-6.4 {
378  add_test_function $DB 1 1 1
379  execsql {
380    SELECT test_function('sqlite')
381  }
382} {{UTF-16LE sqlite}}
383db close
384sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
385do_test enc2-6.5 {
386  add_test_function $DB 0 1 0
387  execsql {
388    SELECT test_function('sqlite')
389  }
390} {{UTF-16LE sqlite}}
391db close
392sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
393do_test enc2-6.6 {
394  add_test_function $DB 0 0 1
395  execsql {
396    SELECT test_function('sqlite')
397  }
398} {{UTF-16BE sqlite}}
399
400db close
401forcedelete test.db
402sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
403execsql {pragma encoding = 'UTF-16BE'}
404do_test enc2-6.7 {
405  execsql {
406    CREATE TABLE t5(a);
407    INSERT INTO t5 VALUES('sqlite');
408  }
409} {}
410do_test enc2-6.8 {
411  add_test_function $DB 1 1 1
412  execsql {
413    SELECT test_function('sqlite')
414  }
415} {{UTF-16BE sqlite}}
416db close
417sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
418do_test enc2-6.9 {
419  add_test_function $DB 0 1 0
420  execsql {
421    SELECT test_function('sqlite')
422  }
423} {{UTF-16LE sqlite}}
424db close
425sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
426do_test enc2-6.10 {
427  add_test_function $DB 0 0 1
428  execsql {
429    SELECT test_function('sqlite')
430  }
431} {{UTF-16BE sqlite}}
432
433
434db close
435forcedelete test.db
436
437# The following tests - enc2-7.* - function as follows:
438#
439# 1: Open an empty database file assuming UTF-16 encoding.
440# 2: Open the same database with a different handle assuming UTF-8. Create
441#    a table using this handle.
442# 3: Read the sqlite_master table from the first handle.
443# 4: Ensure the first handle recognises the database encoding is UTF-8.
444#
445do_test enc2-7.1 {
446  sqlite3 db test.db
447  execsql {
448    PRAGMA encoding = 'UTF-16';
449    SELECT * FROM sqlite_master;
450  }
451} {}
452do_test enc2-7.2 {
453  set enc [execsql {
454    PRAGMA encoding;
455  }]
456  string range $enc 0 end-2 ;# Chop off the "le" or "be"
457} {UTF-16}
458do_test enc2-7.3 {
459  sqlite3 db2 test.db
460  execsql {
461    PRAGMA encoding = 'UTF-8';
462    CREATE TABLE abc(a, b, c);
463  } db2
464} {}
465do_test enc2-7.4 {
466  execsql {
467    SELECT * FROM sqlite_master;
468  }
469} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
470do_test enc2-7.5 {
471  execsql {
472    PRAGMA encoding;
473  }
474} {UTF-8}
475
476db close
477db2 close
478
479proc utf16 {utf8} {
480  set utf16 [encoding convertto unicode $utf8]
481  append utf16 "\x00\x00"
482  return $utf16
483}
484ifcapable {complete} {
485  do_test enc2-8.1 {
486    sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
487  } {1}
488  do_test enc2-8.2 {
489    sqlite3_complete16 [utf16 "SELECT * FROM"]
490  } {0}
491}
492
493# Test that the encoding of an empty database may still be set after the
494# (empty) schema has been initialized.
495forcedelete test.db
496do_test enc2-9.1 {
497  sqlite3 db test.db
498  execsql {
499    PRAGMA encoding = 'UTF-8';
500    PRAGMA encoding;
501  }
502} {UTF-8}
503do_test enc2-9.2 {
504  sqlite3 db test.db
505  execsql {
506    PRAGMA encoding = 'UTF-16le';
507    PRAGMA encoding;
508  }
509} {UTF-16le}
510do_test enc2-9.3 {
511  sqlite3 db test.db
512  execsql {
513    SELECT * FROM sqlite_master;
514    PRAGMA encoding = 'UTF-8';
515    PRAGMA encoding;
516  }
517} {UTF-8}
518do_test enc2-9.4 {
519  sqlite3 db test.db
520  execsql {
521    PRAGMA encoding = 'UTF-16le';
522    CREATE TABLE abc(a, b, c);
523    PRAGMA encoding;
524  }
525} {UTF-16le}
526do_test enc2-9.5 {
527  sqlite3 db test.db
528  execsql {
529    PRAGMA encoding = 'UTF-8';
530    PRAGMA encoding;
531  }
532} {UTF-16le}
533
534# Ticket #1987.
535# Disallow encoding changes once the encoding has been set.
536#
537do_test enc2-10.1 {
538  db close
539  forcedelete test.db test.db-journal
540  sqlite3 db test.db
541  db eval {
542    PRAGMA encoding=UTF16;
543    CREATE TABLE t1(a);
544    PRAGMA encoding=UTF8;
545    CREATE TABLE t2(b);
546  }
547  db close
548  sqlite3 db test.db
549  db eval {
550    SELECT name FROM sqlite_master
551  }
552} {t1 t2}
553
554# 2020-01-15 ticket a08879a4a476eea9
555# Do not allow a database connection encoding change unless *all*
556# attached databases are empty.
557#
558reset_db
559do_execsql_test enc2-11.10 {
560  PRAGMA encoding=UTF8;
561  CREATE TEMP TABLE t1(x);
562  INSERT INTO t1 VALUES('this is a test');
563  PRAGMA encoding=UTF16;
564  SELECT * FROM t1;
565} {{this is a test}}
566
567finish_test
568