1 /* 2 ** 2018-01-08 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains code to implement the sqlite3_normalize() function. 14 ** 15 ** char *sqlite3_normalize(const char *zSql); 16 ** 17 ** This function takes an SQL string as input and returns a "normalized" 18 ** version of that string in memory obtained from sqlite3_malloc64(). The 19 ** caller is responsible for ensuring that the returned memory is freed. 20 ** 21 ** If a memory allocation error occurs, this routine returns NULL. 22 ** 23 ** The normalization consists of the following transformations: 24 ** 25 ** (1) Convert every literal (string, blob literal, numeric constant, 26 ** or "NULL" constant) into a ? 27 ** 28 ** (2) Remove all superfluous whitespace, including comments. Change 29 ** all required whitespace to a single space character. 30 ** 31 ** (3) Lowercase all ASCII characters. 32 ** 33 ** (4) If an IN or NOT IN operator is followed by a list of 1 or more 34 ** values, convert that list into "(?,?,?)". 35 ** 36 ** The purpose of normalization is two-fold: 37 ** 38 ** (1) Sanitize queries by removing possibly sensitive information contained 39 ** in literals. 40 ** 41 ** (2) Identify structurally identical queries by comparing their 42 ** normalized forms. 43 */ 44 #include <sqlite3.h> 45 #include <string.h> 46 47 /* 48 ** Implementation note: 49 ** 50 ** Much of the tokenizer logic is copied out of the tokenize.c source file 51 ** of SQLite. This logic could be simplified for this particular application, 52 ** but that would impose a risk of introducing subtle errors. It is best to 53 ** keep the code as close to the original as possible. 54 */ 55 56 57 /* Character classes for tokenizing 58 ** 59 ** In the sqlite3GetToken() function, a switch() on aiClass[c] is implemented 60 ** using a lookup table, whereas a switch() directly on c uses a binary search. 61 ** The lookup table is much faster. To maximize speed, and to ensure that 62 ** a lookup table is used, all of the classes need to be small integers and 63 ** all of them need to be used within the switch. 64 */ 65 #define CC_X 0 /* The letter 'x', or start of BLOB literal */ 66 #define CC_KYWD 1 /* Alphabetics or '_'. Usable in a keyword */ 67 #define CC_ID 2 /* unicode characters usable in IDs */ 68 #define CC_DIGIT 3 /* Digits */ 69 #define CC_DOLLAR 4 /* '$' */ 70 #define CC_VARALPHA 5 /* '@', '#', ':'. Alphabetic SQL variables */ 71 #define CC_VARNUM 6 /* '?'. Numeric SQL variables */ 72 #define CC_SPACE 7 /* Space characters */ 73 #define CC_QUOTE 8 /* '"', '\'', or '`'. String literals, quoted ids */ 74 #define CC_QUOTE2 9 /* '['. [...] style quoted ids */ 75 #define CC_PIPE 10 /* '|'. Bitwise OR or concatenate */ 76 #define CC_MINUS 11 /* '-'. Minus or SQL-style comment */ 77 #define CC_LT 12 /* '<'. Part of < or <= or <> */ 78 #define CC_GT 13 /* '>'. Part of > or >= */ 79 #define CC_EQ 14 /* '='. Part of = or == */ 80 #define CC_BANG 15 /* '!'. Part of != */ 81 #define CC_SLASH 16 /* '/'. / or c-style comment */ 82 #define CC_LP 17 /* '(' */ 83 #define CC_RP 18 /* ')' */ 84 #define CC_SEMI 19 /* ';' */ 85 #define CC_PLUS 20 /* '+' */ 86 #define CC_STAR 21 /* '*' */ 87 #define CC_PERCENT 22 /* '%' */ 88 #define CC_COMMA 23 /* ',' */ 89 #define CC_AND 24 /* '&' */ 90 #define CC_TILDA 25 /* '~' */ 91 #define CC_DOT 26 /* '.' */ 92 #define CC_ILLEGAL 27 /* Illegal character */ 93 94 static const unsigned char aiClass[] = { 95 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ 96 /* 0x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 7, 7, 27, 7, 7, 27, 27, 97 /* 1x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98 /* 2x */ 7, 15, 8, 5, 4, 22, 24, 8, 17, 18, 21, 20, 23, 11, 26, 16, 99 /* 3x */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 19, 12, 14, 13, 6, 100 /* 4x */ 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 101 /* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 9, 27, 27, 27, 1, 102 /* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 103 /* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 27, 10, 27, 25, 27, 104 /* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 105 /* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 106 /* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 107 /* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 108 /* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 109 /* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 110 /* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 111 /* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 112 }; 113 114 /* An array to map all upper-case characters into their corresponding 115 ** lower-case character. 116 ** 117 ** SQLite only considers US-ASCII (or EBCDIC) characters. We do not 118 ** handle case conversions for the UTF character set since the tables 119 ** involved are nearly as big or bigger than SQLite itself. 120 */ 121 const unsigned char sqlite3UpperToLower[] = { 122 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 123 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 124 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 125 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, 126 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, 127 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, 128 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, 129 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 130 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, 131 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, 132 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, 133 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, 134 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, 135 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, 136 252,253,254,255 137 }; 138 139 /* 140 ** The following 256 byte lookup table is used to support SQLites built-in 141 ** equivalents to the following standard library functions: 142 ** 143 ** isspace() 0x01 144 ** isalpha() 0x02 145 ** isdigit() 0x04 146 ** isalnum() 0x06 147 ** isxdigit() 0x08 148 ** toupper() 0x20 149 ** SQLite identifier character 0x40 150 ** Quote character 0x80 151 ** 152 ** Bit 0x20 is set if the mapped character requires translation to upper 153 ** case. i.e. if the character is a lower-case ASCII character. 154 ** If x is a lower-case ASCII character, then its upper-case equivalent 155 ** is (x - 0x20). Therefore toupper() can be implemented as: 156 ** 157 ** (x & ~(map[x]&0x20)) 158 ** 159 ** The equivalent of tolower() is implemented using the sqlite3UpperToLower[] 160 ** array. tolower() is used more often than toupper() by SQLite. 161 ** 162 ** Bit 0x40 is set if the character is non-alphanumeric and can be used in an 163 ** SQLite identifier. Identifiers are alphanumerics, "_", "$", and any 164 ** non-ASCII UTF character. Hence the test for whether or not a character is 165 ** part of an identifier is 0x46. 166 */ 167 const unsigned char sqlite3CtypeMap[256] = { 168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00..07 ........ */ 169 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 08..0f ........ */ 170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10..17 ........ */ 171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 18..1f ........ */ 172 0x01, 0x00, 0x80, 0x00, 0x40, 0x00, 0x00, 0x80, /* 20..27 !"#$%&' */ 173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 28..2f ()*+,-./ */ 174 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, /* 30..37 01234567 */ 175 0x0c, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 38..3f 89:;<=>? */ 176 177 0x00, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x02, /* 40..47 @ABCDEFG */ 178 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 48..4f HIJKLMNO */ 179 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, /* 50..57 PQRSTUVW */ 180 0x02, 0x02, 0x02, 0x80, 0x00, 0x00, 0x00, 0x40, /* 58..5f XYZ[\]^_ */ 181 0x80, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x2a, 0x22, /* 60..67 `abcdefg */ 182 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 68..6f hijklmno */ 183 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, /* 70..77 pqrstuvw */ 184 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, /* 78..7f xyz{|}~. */ 185 186 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 80..87 ........ */ 187 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 88..8f ........ */ 188 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 90..97 ........ */ 189 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* 98..9f ........ */ 190 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a0..a7 ........ */ 191 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* a8..af ........ */ 192 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b0..b7 ........ */ 193 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* b8..bf ........ */ 194 195 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c0..c7 ........ */ 196 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* c8..cf ........ */ 197 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d0..d7 ........ */ 198 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* d8..df ........ */ 199 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e0..e7 ........ */ 200 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* e8..ef ........ */ 201 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, /* f0..f7 ........ */ 202 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 /* f8..ff ........ */ 203 }; 204 #define sqlite3Toupper(x) ((x)&~(sqlite3CtypeMap[(unsigned char)(x)]&0x20)) 205 #define sqlite3Isspace(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x01) 206 #define sqlite3Isalnum(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x06) 207 #define sqlite3Isalpha(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x02) 208 #define sqlite3Isdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x04) 209 #define sqlite3Isxdigit(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x08) 210 #define sqlite3Tolower(x) (sqlite3UpperToLower[(unsigned char)(x)]) 211 #define sqlite3Isquote(x) (sqlite3CtypeMap[(unsigned char)(x)]&0x80) 212 213 214 /* 215 ** If X is a character that can be used in an identifier then 216 ** IdChar(X) will be true. Otherwise it is false. 217 ** 218 ** For ASCII, any character with the high-order bit set is 219 ** allowed in an identifier. For 7-bit characters, 220 ** sqlite3IsIdChar[X] must be 1. 221 ** 222 ** For EBCDIC, the rules are more complex but have the same 223 ** end result. 224 ** 225 ** Ticket #1066. the SQL standard does not allow '$' in the 226 ** middle of identifiers. But many SQL implementations do. 227 ** SQLite will allow '$' in identifiers for compatibility. 228 ** But the feature is undocumented. 229 */ 230 #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) 231 232 /* 233 ** Ignore testcase() macros 234 */ 235 #define testcase(X) 236 237 /* 238 ** Token values 239 */ 240 #define TK_SPACE 0 241 #define TK_NAME 1 242 #define TK_LITERAL 2 243 #define TK_PUNCT 3 244 #define TK_ERROR 4 245 246 #define TK_MINUS TK_PUNCT 247 #define TK_LP TK_PUNCT 248 #define TK_RP TK_PUNCT 249 #define TK_SEMI TK_PUNCT 250 #define TK_PLUS TK_PUNCT 251 #define TK_STAR TK_PUNCT 252 #define TK_SLASH TK_PUNCT 253 #define TK_REM TK_PUNCT 254 #define TK_EQ TK_PUNCT 255 #define TK_LE TK_PUNCT 256 #define TK_NE TK_PUNCT 257 #define TK_LSHIFT TK_PUNCT 258 #define TK_LT TK_PUNCT 259 #define TK_GE TK_PUNCT 260 #define TK_RSHIFT TK_PUNCT 261 #define TK_GT TK_PUNCT 262 #define TK_GE TK_PUNCT 263 #define TK_BITOR TK_PUNCT 264 #define TK_CONCAT TK_PUNCT 265 #define TK_COMMA TK_PUNCT 266 #define TK_BITAND TK_PUNCT 267 #define TK_BITNOT TK_PUNCT 268 #define TK_STRING TK_LITERAL 269 #define TK_ID TK_NAME 270 #define TK_ILLEGAL TK_ERROR 271 #define TK_DOT TK_PUNCT 272 #define TK_INTEGER TK_LITERAL 273 #define TK_FLOAT TK_LITERAL 274 #define TK_VARIABLE TK_LITERAL 275 #define TK_BLOB TK_LITERAL 276 277 /* 278 ** Return the length (in bytes) of the token that begins at z[0]. 279 ** Store the token type in *tokenType before returning. 280 */ 281 static int sqlite3GetToken(const unsigned char *z, int *tokenType){ 282 int i, c; 283 switch( aiClass[*z] ){ /* Switch on the character-class of the first byte 284 ** of the token. See the comment on the CC_ defines 285 ** above. */ 286 case CC_SPACE: { 287 for(i=1; sqlite3Isspace(z[i]); i++){} 288 *tokenType = TK_SPACE; 289 return i; 290 } 291 case CC_MINUS: { 292 if( z[1]=='-' ){ 293 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 294 *tokenType = TK_SPACE; 295 return i; 296 } 297 *tokenType = TK_MINUS; 298 return 1; 299 } 300 case CC_LP: { 301 *tokenType = TK_LP; 302 return 1; 303 } 304 case CC_RP: { 305 *tokenType = TK_RP; 306 return 1; 307 } 308 case CC_SEMI: { 309 *tokenType = TK_SEMI; 310 return 1; 311 } 312 case CC_PLUS: { 313 *tokenType = TK_PLUS; 314 return 1; 315 } 316 case CC_STAR: { 317 *tokenType = TK_STAR; 318 return 1; 319 } 320 case CC_SLASH: { 321 if( z[1]!='*' || z[2]==0 ){ 322 *tokenType = TK_SLASH; 323 return 1; 324 } 325 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 326 if( c ) i++; 327 *tokenType = TK_SPACE; 328 return i; 329 } 330 case CC_PERCENT: { 331 *tokenType = TK_REM; 332 return 1; 333 } 334 case CC_EQ: { 335 *tokenType = TK_EQ; 336 return 1 + (z[1]=='='); 337 } 338 case CC_LT: { 339 if( (c=z[1])=='=' ){ 340 *tokenType = TK_LE; 341 return 2; 342 }else if( c=='>' ){ 343 *tokenType = TK_NE; 344 return 2; 345 }else if( c=='<' ){ 346 *tokenType = TK_LSHIFT; 347 return 2; 348 }else{ 349 *tokenType = TK_LT; 350 return 1; 351 } 352 } 353 case CC_GT: { 354 if( (c=z[1])=='=' ){ 355 *tokenType = TK_GE; 356 return 2; 357 }else if( c=='>' ){ 358 *tokenType = TK_RSHIFT; 359 return 2; 360 }else{ 361 *tokenType = TK_GT; 362 return 1; 363 } 364 } 365 case CC_BANG: { 366 if( z[1]!='=' ){ 367 *tokenType = TK_ILLEGAL; 368 return 1; 369 }else{ 370 *tokenType = TK_NE; 371 return 2; 372 } 373 } 374 case CC_PIPE: { 375 if( z[1]!='|' ){ 376 *tokenType = TK_BITOR; 377 return 1; 378 }else{ 379 *tokenType = TK_CONCAT; 380 return 2; 381 } 382 } 383 case CC_COMMA: { 384 *tokenType = TK_COMMA; 385 return 1; 386 } 387 case CC_AND: { 388 *tokenType = TK_BITAND; 389 return 1; 390 } 391 case CC_TILDA: { 392 *tokenType = TK_BITNOT; 393 return 1; 394 } 395 case CC_QUOTE: { 396 int delim = z[0]; 397 testcase( delim=='`' ); 398 testcase( delim=='\'' ); 399 testcase( delim=='"' ); 400 for(i=1; (c=z[i])!=0; i++){ 401 if( c==delim ){ 402 if( z[i+1]==delim ){ 403 i++; 404 }else{ 405 break; 406 } 407 } 408 } 409 if( c=='\'' ){ 410 *tokenType = TK_STRING; 411 return i+1; 412 }else if( c!=0 ){ 413 *tokenType = TK_ID; 414 return i+1; 415 }else{ 416 *tokenType = TK_ILLEGAL; 417 return i; 418 } 419 } 420 case CC_DOT: { 421 if( !sqlite3Isdigit(z[1]) ){ 422 *tokenType = TK_DOT; 423 return 1; 424 } 425 /* If the next character is a digit, this is a floating point 426 ** number that begins with ".". Fall thru into the next case */ 427 } 428 case CC_DIGIT: { 429 *tokenType = TK_INTEGER; 430 if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ 431 for(i=3; sqlite3Isxdigit(z[i]); i++){} 432 return i; 433 } 434 for(i=0; sqlite3Isdigit(z[i]); i++){} 435 if( z[i]=='.' ){ 436 i++; 437 while( sqlite3Isdigit(z[i]) ){ i++; } 438 *tokenType = TK_FLOAT; 439 } 440 if( (z[i]=='e' || z[i]=='E') && 441 ( sqlite3Isdigit(z[i+1]) 442 || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) 443 ) 444 ){ 445 i += 2; 446 while( sqlite3Isdigit(z[i]) ){ i++; } 447 *tokenType = TK_FLOAT; 448 } 449 while( IdChar(z[i]) ){ 450 *tokenType = TK_ILLEGAL; 451 i++; 452 } 453 return i; 454 } 455 case CC_QUOTE2: { 456 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 457 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 458 return i; 459 } 460 case CC_VARNUM: { 461 *tokenType = TK_VARIABLE; 462 for(i=1; sqlite3Isdigit(z[i]); i++){} 463 return i; 464 } 465 case CC_DOLLAR: 466 case CC_VARALPHA: { 467 int n = 0; 468 testcase( z[0]=='$' ); testcase( z[0]=='@' ); 469 testcase( z[0]==':' ); testcase( z[0]=='#' ); 470 *tokenType = TK_VARIABLE; 471 for(i=1; (c=z[i])!=0; i++){ 472 if( IdChar(c) ){ 473 n++; 474 }else if( c=='(' && n>0 ){ 475 do{ 476 i++; 477 }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); 478 if( c==')' ){ 479 i++; 480 }else{ 481 *tokenType = TK_ILLEGAL; 482 } 483 break; 484 }else if( c==':' && z[i+1]==':' ){ 485 i++; 486 }else{ 487 break; 488 } 489 } 490 if( n==0 ) *tokenType = TK_ILLEGAL; 491 return i; 492 } 493 case CC_KYWD: { 494 for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} 495 if( IdChar(z[i]) ){ 496 /* This token started out using characters that can appear in keywords, 497 ** but z[i] is a character not allowed within keywords, so this must 498 ** be an identifier instead */ 499 i++; 500 break; 501 } 502 *tokenType = TK_ID; 503 return i; 504 } 505 case CC_X: { 506 testcase( z[0]=='x' ); testcase( z[0]=='X' ); 507 if( z[1]=='\'' ){ 508 *tokenType = TK_BLOB; 509 for(i=2; sqlite3Isxdigit(z[i]); i++){} 510 if( z[i]!='\'' || i%2 ){ 511 *tokenType = TK_ILLEGAL; 512 while( z[i] && z[i]!='\'' ){ i++; } 513 } 514 if( z[i] ) i++; 515 return i; 516 } 517 /* If it is not a BLOB literal, then it must be an ID, since no 518 ** SQL keywords start with the letter 'x'. Fall through */ 519 } 520 case CC_ID: { 521 i = 1; 522 break; 523 } 524 default: { 525 *tokenType = TK_ILLEGAL; 526 return 1; 527 } 528 } 529 while( IdChar(z[i]) ){ i++; } 530 *tokenType = TK_ID; 531 return i; 532 } 533 534 char *sqlite3_normalize(const char *zSql){ 535 char *z; /* The output string */ 536 sqlite3_int64 nZ; /* Size of the output string in bytes */ 537 sqlite3_int64 nSql; /* Size of the input string in bytes */ 538 int i; /* Next character to read from zSql[] */ 539 int j; /* Next slot to fill in on z[] */ 540 int tokenType; /* Type of the next token */ 541 int n; /* Size of the next token */ 542 int k; /* Loop counter */ 543 544 nSql = strlen(zSql); 545 nZ = nSql; 546 z = sqlite3_malloc64( nZ+2 ); 547 if( z==0 ) return 0; 548 for(i=j=0; zSql[i]; i += n){ 549 n = sqlite3GetToken((unsigned char*)zSql+i, &tokenType); 550 switch( tokenType ){ 551 case TK_SPACE: { 552 break; 553 } 554 case TK_ERROR: { 555 sqlite3_free(z); 556 return 0; 557 } 558 case TK_LITERAL: { 559 z[j++] = '?'; 560 break; 561 } 562 case TK_PUNCT: 563 case TK_NAME: { 564 if( j>0 && IdChar(z[j-1]) && IdChar(zSql[i]) ) z[j++] = ' '; 565 for(k=0; k<n; k++){ 566 z[j++] = sqlite3Tolower(zSql[i+k]); 567 } 568 break; 569 } 570 } 571 } 572 while( j>0 && z[j-1]==' ' ){ j--; } 573 if( i>0 && z[j-1]!=';' ){ z[j++] = ';'; } 574 z[j] = 0; 575 return z; 576 } 577 578 #ifdef NORMALIZE_TEST 579 #include <stdio.h> 580 #include <stdlib.h> 581 582 static void normalizeFile(char *zIn){ 583 int i; 584 if( zIn==0 ) return; 585 for(i=0; zIn[i]; i++){ 586 char cSaved; 587 if( zIn[i]!=';' ) continue; 588 cSaved = zIn[i+1]; 589 zIn[i+1] = 0; 590 if( sqlite3_complete(zIn) ){ 591 char *zOut = sqlite3_normalize(zIn); 592 if( zOut ){ 593 printf("%s\n", zOut); 594 sqlite3_free(zOut); 595 }else{ 596 fprintf(stderr, "ERROR: %s\n", zIn); 597 } 598 zIn[i+1] = cSaved; 599 zIn += i+1; 600 i = -1; 601 }else{ 602 zIn[i+1] = cSaved; 603 } 604 } 605 } 606 607 int main(int argc, char **argv){ 608 int i; 609 FILE *in; 610 char *zBuf = 0; 611 sqlite3_int64 sz, got; 612 613 for(i=1; i<argc; i++){ 614 in = fopen(argv[i], "rb"); 615 if( in==0 ){ 616 fprintf(stderr, "cannot open \"%s\"\n", argv[i]); 617 continue; 618 } 619 fseek(in, 0, SEEK_END); 620 sz = ftell(in); 621 rewind(in); 622 zBuf = sqlite3_realloc64(zBuf, sz+1); 623 if( zBuf==0 ){ 624 fprintf(stderr, "failed to malloc for %lld bytes\n", sz); 625 exit(1); 626 } 627 got = fread(zBuf, 1, sz, in); 628 fclose(in); 629 if( got!=sz ){ 630 fprintf(stderr, "only able to read %lld of %lld bytes from \"%s\"\n", 631 got, sz, argv[i]); 632 }else{ 633 zBuf[got] = 0; 634 normalizeFile(zBuf); 635 } 636 } 637 sqlite3_free(zBuf); 638 } 639 #endif /* NORMALIZE_TEST */ 640