1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** Code for testing the utf.c module in SQLite. This code 13 ** is not included in the SQLite library. It is used for automated 14 ** testing of the SQLite library. Specifically, the code in this file 15 ** is used for testing the SQLite routines for converting between 16 ** the various supported unicode encodings. 17 ** 18 ** $Id: test5.c,v 1.9 2004/06/09 09:55:19 danielk1977 Exp $ 19 */ 20 #include "sqliteInt.h" 21 #include "vdbeInt.h" 22 #include "os.h" /* to get SQLITE_BIGENDIAN */ 23 #include "tcl.h" 24 #include <stdlib.h> 25 #include <string.h> 26 27 /* 28 ** Return the number of bytes up to and including the first pair of 29 ** 0x00 bytes in *pStr. 30 */ 31 static int utf16_length(const unsigned char *pZ){ 32 const unsigned char *pC1 = pZ; 33 const unsigned char *pC2 = pZ+1; 34 while( *pC1 || *pC2 ){ 35 pC1 += 2; 36 pC2 += 2; 37 } 38 return (pC1-pZ)+2; 39 } 40 41 /* 42 ** tclcmd: sqlite_utf8to16le STRING 43 ** title: Convert STRING from utf-8 to utf-16le 44 ** 45 ** Return the utf-16le encoded string 46 */ 47 static int sqlite_utf8to16le( 48 void * clientData, 49 Tcl_Interp *interp, 50 int objc, 51 Tcl_Obj *CONST objv[] 52 ){ 53 unsigned char *out; 54 unsigned char *in; 55 Tcl_Obj *res; 56 57 if( objc!=2 ){ 58 Tcl_AppendResult(interp, "wrong # args: should be \"", 59 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0); 60 return TCL_ERROR; 61 } 62 63 in = Tcl_GetString(objv[1]); 64 out = (unsigned char *)sqlite3utf8to16le(in, -1); 65 res = Tcl_NewByteArrayObj(out, utf16_length(out)); 66 sqliteFree(out); 67 68 Tcl_SetObjResult(interp, res); 69 70 return TCL_OK; 71 } 72 73 /* 74 ** tclcmd: sqlite_utf8to16be STRING 75 ** title: Convert STRING from utf-8 to utf-16be 76 ** 77 ** Return the utf-16be encoded string 78 */ 79 static int sqlite_utf8to16be( 80 void * clientData, 81 Tcl_Interp *interp, 82 int objc, 83 Tcl_Obj *CONST objv[] 84 ){ 85 unsigned char *out; 86 unsigned char *in; 87 Tcl_Obj *res; 88 89 if( objc!=2 ){ 90 Tcl_AppendResult(interp, "wrong # args: should be \"", 91 Tcl_GetStringFromObj(objv[0], 0), "<utf-8 encoded-string>", 0); 92 return TCL_ERROR; 93 } 94 95 in = Tcl_GetByteArrayFromObj(objv[1], 0); 96 in = Tcl_GetString(objv[1]); 97 out = (unsigned char *)sqlite3utf8to16be(in, -1); 98 res = Tcl_NewByteArrayObj(out, utf16_length(out)); 99 sqliteFree(out); 100 101 Tcl_SetObjResult(interp, res); 102 103 return TCL_OK; 104 } 105 106 /* 107 ** tclcmd: sqlite_utf16to16le STRING 108 ** title: Convert STRING from utf-16 in native byte order to utf-16le 109 ** 110 ** Return the utf-16le encoded string. If the input string contains 111 ** a byte-order mark, then the byte order mark should override the 112 ** native byte order. 113 */ 114 static int sqlite_utf16to16le( 115 void * clientData, 116 Tcl_Interp *interp, 117 int objc, 118 Tcl_Obj *CONST objv[] 119 ){ 120 unsigned char *out; 121 unsigned char *in; 122 int in_len; 123 Tcl_Obj *res; 124 125 if( objc!=2 ){ 126 Tcl_AppendResult(interp, "wrong # args: should be \"", 127 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0); 128 return TCL_ERROR; 129 } 130 131 in = Tcl_GetByteArrayFromObj(objv[1], &in_len); 132 out = (unsigned char *)sqliteMalloc(in_len); 133 memcpy(out, in, in_len); 134 135 sqlite3utf16to16le(out, -1); 136 res = Tcl_NewByteArrayObj(out, utf16_length(out)); 137 sqliteFree(out); 138 139 Tcl_SetObjResult(interp, res); 140 141 return TCL_OK; 142 } 143 144 /* 145 ** tclcmd: sqlite_utf16to16be STRING 146 ** title: Convert STRING from utf-16 in native byte order to utf-16be 147 ** 148 ** Return the utf-16be encoded string. If the input string contains 149 ** a byte-order mark, then the byte order mark should override the 150 ** native byte order. 151 */ 152 static int sqlite_utf16to16be( 153 void * clientData, 154 Tcl_Interp *interp, 155 int objc, 156 Tcl_Obj *CONST objv[] 157 ){ 158 unsigned char *out; 159 unsigned char *in; 160 int in_len; 161 Tcl_Obj *res; 162 163 if( objc!=2 ){ 164 Tcl_AppendResult(interp, "wrong # args: should be \"", 165 Tcl_GetStringFromObj(objv[0], 0), "<utf-16 encoded-string>", 0); 166 return TCL_ERROR; 167 } 168 169 in = Tcl_GetByteArrayFromObj(objv[1], &in_len); 170 out = (unsigned char *)sqliteMalloc(in_len); 171 memcpy(out, in, in_len); 172 173 sqlite3utf16to16be(out, -1); 174 res = Tcl_NewByteArrayObj(out, utf16_length(out)); 175 sqliteFree(out); 176 177 Tcl_SetObjResult(interp, res); 178 179 return TCL_OK; 180 } 181 182 /* 183 ** tclcmd: sqlite_utf16to8 STRING 184 ** title: Convert STRING from utf-16 in native byte order to utf-8 185 ** 186 ** Return the utf-8 encoded string. If the input string contains 187 ** a byte-order mark, then the byte order mark should override the 188 ** native byte order. 189 */ 190 static int sqlite_utf16to8( 191 void * clientData, 192 Tcl_Interp *interp, 193 int objc, 194 Tcl_Obj *CONST objv[] 195 ){ 196 unsigned char *out; 197 unsigned char *in; 198 Tcl_Obj *res; 199 200 if( objc!=2 ){ 201 Tcl_AppendResult(interp, "wrong # args: should be \"", 202 Tcl_GetStringFromObj(objv[0], 0), " <utf-16 encoded-string>", 0); 203 return TCL_ERROR; 204 } 205 206 in = Tcl_GetByteArrayFromObj(objv[1], 0); 207 out = sqlite3utf16to8(in, -1, SQLITE_BIGENDIAN); 208 res = Tcl_NewByteArrayObj(out, strlen(out)+1); 209 sqliteFree(out); 210 211 Tcl_SetObjResult(interp, res); 212 213 return TCL_OK; 214 } 215 216 /* 217 ** The first argument is a TCL UTF-8 string. Return the byte array 218 ** object with the encoded representation of the string, including 219 ** the NULL terminator. 220 */ 221 static int binarize( 222 void * clientData, 223 Tcl_Interp *interp, 224 int objc, 225 Tcl_Obj *CONST objv[] 226 ){ 227 int len; 228 char *bytes; 229 Tcl_Obj *pRet; 230 assert(objc==2); 231 232 bytes = Tcl_GetStringFromObj(objv[1], &len); 233 pRet = Tcl_NewByteArrayObj(bytes, len+1); 234 Tcl_SetObjResult(interp, pRet); 235 return TCL_OK; 236 } 237 238 /* 239 ** Usage: test_value_overhead <repeat-count> <do-calls>. 240 ** 241 ** This routine is used to test the overhead of calls to 242 ** sqlite3_value_text(), on a value that contains a UTF-8 string. The idea 243 ** is to figure out whether or not it is a problem to use sqlite3_value 244 ** structures with collation sequence functions. 245 ** 246 ** If <do-calls> is 0, then the calls to sqlite3_value_text() are not 247 ** actually made. 248 */ 249 static int test_value_overhead( 250 void * clientData, 251 Tcl_Interp *interp, 252 int objc, 253 Tcl_Obj *CONST objv[] 254 ){ 255 int do_calls; 256 int repeat_count; 257 int i; 258 Mem val; 259 const char *zVal; 260 261 if( objc!=3 ){ 262 Tcl_AppendResult(interp, "wrong # args: should be \"", 263 Tcl_GetStringFromObj(objv[0], 0), " <repeat-count> <do-calls>", 0); 264 return TCL_ERROR; 265 } 266 267 if( Tcl_GetIntFromObj(interp, objv[1], &repeat_count) ) return TCL_ERROR; 268 if( Tcl_GetIntFromObj(interp, objv[2], &do_calls) ) return TCL_ERROR; 269 270 val.flags = MEM_Str|MEM_Term|MEM_Static; 271 val.z = "hello world"; 272 val.type = SQLITE_TEXT; 273 val.enc = TEXT_Utf8; 274 275 for(i=0; i<repeat_count; i++){ 276 if( do_calls ){ 277 zVal = sqlite3_value_text(&val); 278 } 279 } 280 281 return TCL_OK; 282 } 283 284 285 /* 286 ** Register commands with the TCL interpreter. 287 */ 288 int Sqlitetest5_Init(Tcl_Interp *interp){ 289 static struct { 290 char *zName; 291 Tcl_ObjCmdProc *xProc; 292 } aCmd[] = { 293 { "sqlite_utf16to8", (Tcl_ObjCmdProc*)sqlite_utf16to8 }, 294 { "sqlite_utf8to16le", (Tcl_ObjCmdProc*)sqlite_utf8to16le }, 295 { "sqlite_utf8to16be", (Tcl_ObjCmdProc*)sqlite_utf8to16be }, 296 { "sqlite_utf16to16le", (Tcl_ObjCmdProc*)sqlite_utf16to16le }, 297 { "sqlite_utf16to16be", (Tcl_ObjCmdProc*)sqlite_utf16to16be }, 298 { "binarize", (Tcl_ObjCmdProc*)binarize }, 299 { "test_value_overhead", (Tcl_ObjCmdProc*)test_value_overhead }, 300 }; 301 int i; 302 for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){ 303 Tcl_CreateObjCommand(interp, aCmd[i].zName, aCmd[i].xProc, 0, 0); 304 } 305 return SQLITE_OK; 306 } 307 308