3 # The author disclaims copyright to this source code. In place of
4 # a legal notice, here is a blessing:
6 # May you do good and not evil.
7 # May you find forgiveness for yourself and forgive others.
8 # May you share freely, never taking more than you give.
10 #***********************************************************************
11 # This file implements regression tests for SQLite library. The focus of
12 # this file is testing the SQLite routines used for converting between the
13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
18 set testdir [file dirname $argv0]
19 source $testdir/tester.tcl
21 # If UTF16 support is disabled, ignore the tests in this file
28 # The rough organisation of tests in this file is:
30 # enc2.1.*: Simple tests with a UTF-8 db.
31 # enc2.2.*: Simple tests with a UTF-16LE db.
32 # enc2.3.*: Simple tests with a UTF-16BE db.
33 # enc2.4.*: Test that attached databases must have the same text encoding
34 # as the main database.
35 # enc2.5.*: Test the behavior of the library when a collation sequence is
36 # not available for the most desirable text encoding.
37 # enc2.6.*: Similar test for user functions.
38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
39 # wrong text encoding for the database.
40 # enc2.8.*: Test sqlite3_complete16()
45 # Return the UTF-8 representation of the supplied UTF-16 string $str.
47 # If $str ends in two 0x00 0x00 bytes, knock these off before
48 # converting to UTF-8 using TCL.
49 binary scan $str \c* vals
50 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
51 set str [binary format \c* [lrange $vals 0 end-2]]
54 set r [encoding convertfrom unicode $str]
59 # This proc contains all the tests in this file. It is run
60 # three times. Each time the file 'test.db' contains a database
61 # with the following contents:
63 CREATE TABLE t1(a PRIMARY KEY, b, c);
64 INSERT INTO t1 VALUES('one', 'I', 1);
66 # This proc tests that we can open and manipulate the test.db
67 # database, and that it is possible to retreive values in
68 # various text encodings.
70 proc run_test_script {t enc} {
72 # Open the database and pull out a (the) row.
74 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
75 execsql {SELECT * FROM t1}
80 execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
81 execsql {SELECT * FROM t1}
87 INSERT INTO t1 VALUES('three','III',3);
88 INSERT INTO t1 VALUES('four','IV',4);
89 INSERT INTO t1 VALUES('five','V',5);
91 execsql {SELECT * FROM t1}
92 } {one I 1 two II 2 three III 3 four IV 4 five V 5}
97 SELECT * FROM t1 WHERE a = 'one';
102 SELECT * FROM t1 WHERE a = 'four';
108 SELECT * FROM t1 WHERE a IN ('one', 'two');
113 # Now check that we can retrieve data in both UTF-16 and UTF-8
115 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
117 sqlite3_column_text $STMT 0
122 utf8 [sqlite3_column_text16 $STMT 0]
126 sqlite3_finalize $STMT
134 db eval {PRAGMA encoding}
139 # The three unicode encodings understood by SQLite.
140 set encodings [list UTF-8 UTF-16le UTF-16be]
142 set sqlite_os_trace 0
144 foreach enc $encodings {
147 db eval "PRAGMA encoding = \"$enc\""
149 do_test enc2-$i.0.1 {
150 db eval {PRAGMA encoding}
152 do_test enc2-$i.0.2 {
153 db eval {PRAGMA encoding=UTF8}
154 db eval {PRAGMA encoding}
156 do_test enc2-$i.0.3 {
157 db eval {PRAGMA encoding=UTF16le}
158 db eval {PRAGMA encoding}
160 do_test enc2-$i.0.4 {
161 db eval {PRAGMA encoding=UTF16be}
162 db eval {PRAGMA encoding}
166 run_test_script enc2-$i $enc
171 # Test that it is an error to try to attach a database with a different
172 # encoding to the main database.
177 db eval "PRAGMA encoding = 'UTF-8'"
178 db eval "CREATE TABLE abc(a, b, c);"
183 db2 eval "PRAGMA encoding = 'UTF-16'"
184 db2 eval "CREATE TABLE abc(a, b, c);"
188 ATTACH 'test2.db' as aux;
190 } {1 {attached databases must use the same text encoding as main database}}
195 # The following tests - enc2-5.* - test that SQLite selects the correct
196 # collation sequence when more than one is available.
198 set ::values [list one two three four five]
199 set ::test_collate_enc INVALID
200 proc test_collate {enc lhs rhs} {
201 set ::test_collate_enc $enc
202 set l [lsearch -exact $::values $lhs]
203 set r [lsearch -exact $::values $rhs]
204 set res [expr $l - $r]
205 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
214 INSERT INTO t5 VALUES('one');
215 INSERT INTO t5 VALUES('two');
216 INSERT INTO t5 VALUES('five');
217 INSERT INTO t5 VALUES('three');
218 INSERT INTO t5 VALUES('four');
222 add_test_collate $DB 1 1 1
223 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
224 lappend res $::test_collate_enc
225 } {one two three four five UTF-8}
227 add_test_collate $DB 0 1 0
228 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
229 lappend res $::test_collate_enc
230 } {one two three four five UTF-16LE}
232 add_test_collate $DB 0 0 1
233 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
234 lappend res $::test_collate_enc
235 } {one two three four five UTF-16BE}
239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
240 execsql {pragma encoding = 'UTF-16LE'}
244 INSERT INTO t5 VALUES('one');
245 INSERT INTO t5 VALUES('two');
246 INSERT INTO t5 VALUES('five');
247 INSERT INTO t5 VALUES('three');
248 INSERT INTO t5 VALUES('four');
252 add_test_collate $DB 1 1 1
253 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
254 lappend res $::test_collate_enc
255 } {one two three four five UTF-16LE}
257 add_test_collate $DB 1 0 1
258 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
259 lappend res $::test_collate_enc
260 } {one two three four five UTF-16BE}
262 add_test_collate $DB 1 0 0
263 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
264 lappend res $::test_collate_enc
265 } {one two three four five UTF-8}
269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
270 execsql {pragma encoding = 'UTF-16BE'}
274 INSERT INTO t5 VALUES('one');
275 INSERT INTO t5 VALUES('two');
276 INSERT INTO t5 VALUES('five');
277 INSERT INTO t5 VALUES('three');
278 INSERT INTO t5 VALUES('four');
282 add_test_collate $DB 1 1 1
283 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
284 lappend res $::test_collate_enc
285 } {one two three four five UTF-16BE}
287 add_test_collate $DB 1 1 0
288 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
289 lappend res $::test_collate_enc
290 } {one two three four five UTF-16LE}
292 add_test_collate $DB 1 0 0
293 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
294 lappend res $::test_collate_enc
295 } {one two three four five UTF-8}
297 # Also test that a UTF-16 collation factory works.
299 add_test_collate $DB 0 0 0
301 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
303 } {1 {no such collation sequence: test_collate}}
305 add_test_collate_needed $DB
306 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
307 lappend res $::test_collate_enc
308 } {one two three four five UTF-16BE}
310 set ::sqlite_last_needed_collation
317 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
318 add_test_collate_needed $::DB
319 set ::sqlite_last_needed_collation
322 execsql {CREATE TABLE t1(a varchar collate test_collate);}
325 set ::sqlite_last_needed_collation
328 # The following tests - enc2-6.* - test that SQLite selects the correct
329 # user function when more than one is available.
331 proc test_function {enc arg} {
337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
338 execsql {pragma encoding = 'UTF-8'}
342 INSERT INTO t5 VALUES('one');
346 add_test_function $DB 1 1 1
348 SELECT test_function('sqlite')
352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
354 add_test_function $DB 0 1 0
356 SELECT test_function('sqlite')
358 } {{UTF-16LE sqlite}}
360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
362 add_test_function $DB 0 0 1
364 SELECT test_function('sqlite')
366 } {{UTF-16BE sqlite}}
370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
371 execsql {pragma encoding = 'UTF-16LE'}
375 INSERT INTO t5 VALUES('sqlite');
379 add_test_function $DB 1 1 1
381 SELECT test_function('sqlite')
383 } {{UTF-16LE sqlite}}
385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
387 add_test_function $DB 0 1 0
389 SELECT test_function('sqlite')
391 } {{UTF-16LE sqlite}}
393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
395 add_test_function $DB 0 0 1
397 SELECT test_function('sqlite')
399 } {{UTF-16BE sqlite}}
403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
404 execsql {pragma encoding = 'UTF-16BE'}
408 INSERT INTO t5 VALUES('sqlite');
412 add_test_function $DB 1 1 1
414 SELECT test_function('sqlite')
416 } {{UTF-16BE sqlite}}
418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
420 add_test_function $DB 0 1 0
422 SELECT test_function('sqlite')
424 } {{UTF-16LE sqlite}}
426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
428 add_test_function $DB 0 0 1
430 SELECT test_function('sqlite')
432 } {{UTF-16BE sqlite}}
438 # The following tests - enc2-7.* - function as follows:
440 # 1: Open an empty database file assuming UTF-16 encoding.
441 # 2: Open the same database with a different handle assuming UTF-8. Create
442 # a table using this handle.
443 # 3: Read the sqlite_master table from the first handle.
444 # 4: Ensure the first handle recognises the database encoding is UTF-8.
449 PRAGMA encoding = 'UTF-16';
450 SELECT * FROM sqlite_master;
457 string range $enc 0 end-2 ;# Chop off the "le" or "be"
462 PRAGMA encoding = 'UTF-8';
463 CREATE TABLE abc(a, b, c);
468 SELECT * FROM sqlite_master;
470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
481 set utf16 [encoding convertto unicode $utf8]
482 append utf16 "\x00\x00"
485 ifcapable {complete} {
487 sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
490 sqlite3_complete16 [utf16 "SELECT * FROM"]
494 # Test that the encoding of an empty database may still be set after the
495 # (empty) schema has been initialized.
500 PRAGMA encoding = 'UTF-8';
507 PRAGMA encoding = 'UTF-16le';
514 SELECT * FROM sqlite_master;
515 PRAGMA encoding = 'UTF-8';
522 PRAGMA encoding = 'UTF-16le';
523 CREATE TABLE abc(a, b, c);
530 PRAGMA encoding = 'UTF-8';
536 # Disallow encoding changes once the encoding has been set.
540 forcedelete test.db test.db-journal
543 PRAGMA encoding=UTF16;
545 PRAGMA encoding=UTF8;
551 SELECT name FROM sqlite_master