1dba2cc43Sshaneh# 2011 March 15 2dba2cc43Sshaneh# 3dba2cc43Sshaneh# The author disclaims copyright to this source code. In place of 4dba2cc43Sshaneh# a legal notice, here is a blessing: 5dba2cc43Sshaneh# 6dba2cc43Sshaneh# May you do good and not evil. 7dba2cc43Sshaneh# May you find forgiveness for yourself and forgive others. 8dba2cc43Sshaneh# May you share freely, never taking more than you give. 9dba2cc43Sshaneh# 10dba2cc43Sshaneh#*********************************************************************** 11dba2cc43Sshaneh# This file implements regression tests for SQLite library. 12dba2cc43Sshaneh# 13dba2cc43Sshaneh# This file checks to make sure SQLite is able to gracEFully 14dba2cc43Sshaneh# handle malformed UTF-8. 15dba2cc43Sshaneh# 16dba2cc43Sshaneh 17dba2cc43Sshanehset testdir [file dirname $argv0] 18dba2cc43Sshanehsource $testdir/tester.tcl 19dba2cc43Sshaneh 20dba2cc43Sshanehproc utf8_to_ustr2 {s} { 21dba2cc43Sshaneh set r "" 22dba2cc43Sshaneh foreach i [split $s ""] { 23dba2cc43Sshaneh scan $i %c c 24dba2cc43Sshaneh append r [format \\u%04.4X $c] 25dba2cc43Sshaneh } 26dba2cc43Sshaneh set r 27dba2cc43Sshaneh} 28dba2cc43Sshaneh 29dba2cc43Sshanehproc utf8_to_hstr {in} { 30dba2cc43Sshaneh regsub -all -- {(..)} $in {%[format "%s" \1]} out 31dba2cc43Sshaneh subst $out 32dba2cc43Sshaneh} 33dba2cc43Sshaneh 34dba2cc43Sshanehproc utf8_to_xstr {in} { 35dba2cc43Sshaneh regsub -all -- {(..)} $in {\\\\x[format "%s" \1]} out 36dba2cc43Sshaneh subst $out 37dba2cc43Sshaneh} 38dba2cc43Sshaneh 39dba2cc43Sshanehproc utf8_to_ustr {in} { 40dba2cc43Sshaneh regsub -all -- {(..)} $in {\\\\u[format "%04.4X" 0x\1]} out 41dba2cc43Sshaneh subst $out 42dba2cc43Sshaneh} 43dba2cc43Sshaneh 44dba2cc43Sshanehdo_test badutf2-1.0 { 45dba2cc43Sshaneh db close 46dba2cc43Sshaneh forcedelete test.db 47dba2cc43Sshaneh sqlite3 db test.db 48dba2cc43Sshaneh db eval "PRAGMA encoding = 'UTF-8'" 49dba2cc43Sshaneh} {} 50dba2cc43Sshaneh 51dba2cc43Sshanehdo_test badutf2-4.0 { 52dba2cc43Sshaneh set S [sqlite3_prepare_v2 db "SELECT ?" -1 dummy] 53dba2cc43Sshaneh sqlite3_expired $S 54dba2cc43Sshaneh} {0} 55dba2cc43Sshaneh 56dba2cc43Sshanehforeach { i len uval xstr ustr u2u } { 57dba2cc43Sshaneh1 1 00 \x00 {} {} 58dba2cc43Sshaneh2 1 01 \x01 "\\u0001" 01 59dba2cc43Sshaneh3 1 3F \x3F "\\u003F" 3F 60dba2cc43Sshaneh4 1 7F \x7F "\\u007F" 7F 61dba2cc43Sshaneh5 1 80 \x80 "\\u0080" C280 62dba2cc43Sshaneh6 1 C3BF \xFF "\\u00FF" C3BF 63dba2cc43Sshaneh7 3 EFBFBD \xEF\xBF\xBD "\\uFFFD" {} 64dba2cc43Sshaneh} { 65dba2cc43Sshaneh 66dba2cc43Sshaneh set hstr [ utf8_to_hstr $uval ] 67dba2cc43Sshaneh 68dba2cc43Sshaneh ifcapable bloblit { 69dba2cc43Sshaneh if {$hstr != "%00"} { 70dba2cc43Sshaneh do_test badutf2-2.1.$i { 71dba2cc43Sshaneh set sql "SELECT '$hstr'=CAST(x'$uval' AS text) AS x;" 72dba2cc43Sshaneh set res [ sqlite3_exec db $sql ] 73dba2cc43Sshaneh lindex [ lindex $res 1] 1 74dba2cc43Sshaneh } {1} 75dba2cc43Sshaneh do_test badutf2-2.2.$i { 76dba2cc43Sshaneh set sql "SELECT CAST('$hstr' AS blob)=x'$uval' AS x;" 77dba2cc43Sshaneh set res [ sqlite3_exec db $sql ] 78dba2cc43Sshaneh lindex [ lindex $res 1] 1 79dba2cc43Sshaneh } {1} 80dba2cc43Sshaneh } 81dba2cc43Sshaneh do_test badutf2-2.3.$i { 82dba2cc43Sshaneh set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" 83dba2cc43Sshaneh set res [ sqlite3_exec db $sql ] 84dba2cc43Sshaneh lindex [ lindex $res 1] 1 85dba2cc43Sshaneh } $uval 86dba2cc43Sshaneh do_test badutf2-2.4.$i { 87dba2cc43Sshaneh set sql "SELECT hex(CAST(x'$uval' AS text)) AS x;" 88dba2cc43Sshaneh set res [ sqlite3_exec db $sql ] 89dba2cc43Sshaneh lindex [ lindex $res 1] 1 90dba2cc43Sshaneh } $uval 91dba2cc43Sshaneh } 92dba2cc43Sshaneh 93dba2cc43Sshaneh if {$hstr != "%00"} { 94dba2cc43Sshaneh do_test badutf2-3.1.$i { 95dba2cc43Sshaneh set sql "SELECT hex('$hstr') AS x;" 96dba2cc43Sshaneh set res [ sqlite3_exec db $sql ] 97dba2cc43Sshaneh lindex [ lindex $res 1] 1 98dba2cc43Sshaneh } $uval 99dba2cc43Sshaneh } 100dba2cc43Sshaneh 101*158931abSdrh # Tcl 8.7 and later do automatic bad-utf8 correction for 102*158931abSdrh # characters 0x80 thru 0x9f so test case 5 does not work here. 103*158931abSdrh if {$i==5 && $tcl_version>=8.7} { 104*158931abSdrh # no-op 105*158931abSdrh } else { 106dba2cc43Sshaneh do_test badutf2-4.1.$i { 107dba2cc43Sshaneh sqlite3_reset $S 108dba2cc43Sshaneh sqlite3_bind_text $S 1 $xstr $len 109dba2cc43Sshaneh sqlite3_step $S 110dba2cc43Sshaneh utf8_to_ustr2 [ sqlite3_column_text $S 0 ] 111dba2cc43Sshaneh } $ustr 112*158931abSdrh } 113dba2cc43Sshaneh 114211fb084Sdan ifcapable debug { 115dba2cc43Sshaneh do_test badutf2-5.1.$i { 116dba2cc43Sshaneh utf8_to_utf8 $uval 117dba2cc43Sshaneh } $u2u 118211fb084Sdan } 119dba2cc43Sshaneh 120dba2cc43Sshaneh} 121dba2cc43Sshaneh 122dba2cc43Sshanehdo_test badutf2-4.2 { 123dba2cc43Sshaneh sqlite3_finalize $S 124dba2cc43Sshaneh} {SQLITE_OK} 125dba2cc43Sshaneh 126dba2cc43Sshaneh 127dba2cc43Sshanehfinish_test 128