1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 ** 18 ** $Id: tokenize.c,v 1.129 2007/05/15 14:34:32 drh Exp $ 19 */ 20 #include "sqliteInt.h" 21 #include "os.h" 22 #include <ctype.h> 23 #include <stdlib.h> 24 25 /* 26 ** The charMap() macro maps alphabetic characters into their 27 ** lower-case ASCII equivalent. On ASCII machines, this is just 28 ** an upper-to-lower case map. On EBCDIC machines we also need 29 ** to adjust the encoding. Only alphabetic characters and underscores 30 ** need to be translated. 31 */ 32 #ifdef SQLITE_ASCII 33 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 34 #endif 35 #ifdef SQLITE_EBCDIC 36 # define charMap(X) ebcdicToAscii[(unsigned char)X] 37 const unsigned char ebcdicToAscii[] = { 38 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 51 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 52 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 53 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 55 }; 56 #endif 57 58 /* 59 ** The sqlite3KeywordCode function looks up an identifier to determine if 60 ** it is a keyword. If it is a keyword, the token code of that keyword is 61 ** returned. If the input is not a keyword, TK_ID is returned. 62 ** 63 ** The implementation of this routine was generated by a program, 64 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 65 ** The output of the mkkeywordhash.c program is written into a file 66 ** named keywordhash.h and then included into this source file by 67 ** the #include below. 68 */ 69 #include "keywordhash.h" 70 71 72 /* 73 ** If X is a character that can be used in an identifier then 74 ** IdChar(X) will be true. Otherwise it is false. 75 ** 76 ** For ASCII, any character with the high-order bit set is 77 ** allowed in an identifier. For 7-bit characters, 78 ** sqlite3IsIdChar[X] must be 1. 79 ** 80 ** For EBCDIC, the rules are more complex but have the same 81 ** end result. 82 ** 83 ** Ticket #1066. the SQL standard does not allow '$' in the 84 ** middle of identfiers. But many SQL implementations do. 85 ** SQLite will allow '$' in identifiers for compatibility. 86 ** But the feature is undocumented. 87 */ 88 #ifdef SQLITE_ASCII 89 const char sqlite3IsIdChar[] = { 90 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 91 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 97 }; 98 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20])) 99 #endif 100 #ifdef SQLITE_EBCDIC 101 const char sqlite3IsIdChar[] = { 102 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 104 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 105 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 106 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 109 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 113 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 115 }; 116 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40])) 117 #endif 118 119 120 /* 121 ** Return the length of the token that begins at z[0]. 122 ** Store the token type in *tokenType before returning. 123 */ 124 static int getToken(const unsigned char *z, int *tokenType){ 125 int i, c; 126 switch( *z ){ 127 case ' ': case '\t': case '\n': case '\f': case '\r': { 128 for(i=1; isspace(z[i]); i++){} 129 *tokenType = TK_SPACE; 130 return i; 131 } 132 case '-': { 133 if( z[1]=='-' ){ 134 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 135 *tokenType = TK_COMMENT; 136 return i; 137 } 138 *tokenType = TK_MINUS; 139 return 1; 140 } 141 case '(': { 142 *tokenType = TK_LP; 143 return 1; 144 } 145 case ')': { 146 *tokenType = TK_RP; 147 return 1; 148 } 149 case ';': { 150 *tokenType = TK_SEMI; 151 return 1; 152 } 153 case '+': { 154 *tokenType = TK_PLUS; 155 return 1; 156 } 157 case '*': { 158 *tokenType = TK_STAR; 159 return 1; 160 } 161 case '/': { 162 if( z[1]!='*' || z[2]==0 ){ 163 *tokenType = TK_SLASH; 164 return 1; 165 } 166 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 167 if( c ) i++; 168 *tokenType = TK_COMMENT; 169 return i; 170 } 171 case '%': { 172 *tokenType = TK_REM; 173 return 1; 174 } 175 case '=': { 176 *tokenType = TK_EQ; 177 return 1 + (z[1]=='='); 178 } 179 case '<': { 180 if( (c=z[1])=='=' ){ 181 *tokenType = TK_LE; 182 return 2; 183 }else if( c=='>' ){ 184 *tokenType = TK_NE; 185 return 2; 186 }else if( c=='<' ){ 187 *tokenType = TK_LSHIFT; 188 return 2; 189 }else{ 190 *tokenType = TK_LT; 191 return 1; 192 } 193 } 194 case '>': { 195 if( (c=z[1])=='=' ){ 196 *tokenType = TK_GE; 197 return 2; 198 }else if( c=='>' ){ 199 *tokenType = TK_RSHIFT; 200 return 2; 201 }else{ 202 *tokenType = TK_GT; 203 return 1; 204 } 205 } 206 case '!': { 207 if( z[1]!='=' ){ 208 *tokenType = TK_ILLEGAL; 209 return 2; 210 }else{ 211 *tokenType = TK_NE; 212 return 2; 213 } 214 } 215 case '|': { 216 if( z[1]!='|' ){ 217 *tokenType = TK_BITOR; 218 return 1; 219 }else{ 220 *tokenType = TK_CONCAT; 221 return 2; 222 } 223 } 224 case ',': { 225 *tokenType = TK_COMMA; 226 return 1; 227 } 228 case '&': { 229 *tokenType = TK_BITAND; 230 return 1; 231 } 232 case '~': { 233 *tokenType = TK_BITNOT; 234 return 1; 235 } 236 case '`': 237 case '\'': 238 case '"': { 239 int delim = z[0]; 240 for(i=1; (c=z[i])!=0; i++){ 241 if( c==delim ){ 242 if( z[i+1]==delim ){ 243 i++; 244 }else{ 245 break; 246 } 247 } 248 } 249 if( c ){ 250 *tokenType = TK_STRING; 251 return i+1; 252 }else{ 253 *tokenType = TK_ILLEGAL; 254 return i; 255 } 256 } 257 case '.': { 258 #ifndef SQLITE_OMIT_FLOATING_POINT 259 if( !isdigit(z[1]) ) 260 #endif 261 { 262 *tokenType = TK_DOT; 263 return 1; 264 } 265 /* If the next character is a digit, this is a floating point 266 ** number that begins with ".". Fall thru into the next case */ 267 } 268 case '0': case '1': case '2': case '3': case '4': 269 case '5': case '6': case '7': case '8': case '9': { 270 *tokenType = TK_INTEGER; 271 for(i=0; isdigit(z[i]); i++){} 272 #ifndef SQLITE_OMIT_FLOATING_POINT 273 if( z[i]=='.' ){ 274 i++; 275 while( isdigit(z[i]) ){ i++; } 276 *tokenType = TK_FLOAT; 277 } 278 if( (z[i]=='e' || z[i]=='E') && 279 ( isdigit(z[i+1]) 280 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 281 ) 282 ){ 283 i += 2; 284 while( isdigit(z[i]) ){ i++; } 285 *tokenType = TK_FLOAT; 286 } 287 #endif 288 while( IdChar(z[i]) ){ 289 *tokenType = TK_ILLEGAL; 290 i++; 291 } 292 return i; 293 } 294 case '[': { 295 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 296 *tokenType = TK_ID; 297 return i; 298 } 299 case '?': { 300 *tokenType = TK_VARIABLE; 301 for(i=1; isdigit(z[i]); i++){} 302 return i; 303 } 304 case '#': { 305 for(i=1; isdigit(z[i]); i++){} 306 if( i>1 ){ 307 /* Parameters of the form #NNN (where NNN is a number) are used 308 ** internally by sqlite3NestedParse. */ 309 *tokenType = TK_REGISTER; 310 return i; 311 } 312 /* Fall through into the next case if the '#' is not followed by 313 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 314 } 315 #ifndef SQLITE_OMIT_TCL_VARIABLE 316 case '$': 317 #endif 318 case '@': /* For compatibility with MS SQL Server */ 319 case ':': { 320 int n = 0; 321 *tokenType = TK_VARIABLE; 322 for(i=1; (c=z[i])!=0; i++){ 323 if( IdChar(c) ){ 324 n++; 325 #ifndef SQLITE_OMIT_TCL_VARIABLE 326 }else if( c=='(' && n>0 ){ 327 do{ 328 i++; 329 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); 330 if( c==')' ){ 331 i++; 332 }else{ 333 *tokenType = TK_ILLEGAL; 334 } 335 break; 336 }else if( c==':' && z[i+1]==':' ){ 337 i++; 338 #endif 339 }else{ 340 break; 341 } 342 } 343 if( n==0 ) *tokenType = TK_ILLEGAL; 344 return i; 345 } 346 #ifndef SQLITE_OMIT_BLOB_LITERAL 347 case 'x': case 'X': { 348 if( (c=z[1])=='\'' || c=='"' ){ 349 int delim = c; 350 *tokenType = TK_BLOB; 351 for(i=2; (c=z[i])!=0; i++){ 352 if( c==delim ){ 353 if( i%2 ) *tokenType = TK_ILLEGAL; 354 break; 355 } 356 if( !isxdigit(c) ){ 357 *tokenType = TK_ILLEGAL; 358 return i; 359 } 360 } 361 if( c ) i++; 362 return i; 363 } 364 /* Otherwise fall through to the next case */ 365 } 366 #endif 367 default: { 368 if( !IdChar(*z) ){ 369 break; 370 } 371 for(i=1; IdChar(z[i]); i++){} 372 *tokenType = keywordCode((char*)z, i); 373 return i; 374 } 375 } 376 *tokenType = TK_ILLEGAL; 377 return 1; 378 } 379 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 380 return getToken(z, tokenType); 381 } 382 383 /* 384 ** Run the parser on the given SQL string. The parser structure is 385 ** passed in. An SQLITE_ status code is returned. If an error occurs 386 ** and pzErrMsg!=NULL then an error message might be written into 387 ** memory obtained from malloc() and *pzErrMsg made to point to that 388 ** error message. Or maybe not. 389 */ 390 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 391 int nErr = 0; 392 int i; 393 void *pEngine; 394 int tokenType; 395 int lastTokenParsed = -1; 396 sqlite3 *db = pParse->db; 397 extern void *sqlite3ParserAlloc(void*(*)(size_t)); 398 extern void sqlite3ParserFree(void*, void(*)(void*)); 399 extern void sqlite3Parser(void*, int, Token, Parse*); 400 401 if( db->activeVdbeCnt==0 ){ 402 db->u1.isInterrupted = 0; 403 } 404 pParse->rc = SQLITE_OK; 405 i = 0; 406 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3MallocX); 407 if( pEngine==0 ){ 408 return SQLITE_NOMEM; 409 } 410 assert( pParse->sLastToken.dyn==0 ); 411 assert( pParse->pNewTable==0 ); 412 assert( pParse->pNewTrigger==0 ); 413 assert( pParse->nVar==0 ); 414 assert( pParse->nVarExpr==0 ); 415 assert( pParse->nVarExprAlloc==0 ); 416 assert( pParse->apVarExpr==0 ); 417 pParse->zTail = pParse->zSql = zSql; 418 while( !sqlite3MallocFailed() && zSql[i]!=0 ){ 419 assert( i>=0 ); 420 pParse->sLastToken.z = (u8*)&zSql[i]; 421 assert( pParse->sLastToken.dyn==0 ); 422 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); 423 i += pParse->sLastToken.n; 424 if( i>SQLITE_MAX_SQL_LENGTH ){ 425 pParse->rc = SQLITE_TOOBIG; 426 break; 427 } 428 switch( tokenType ){ 429 case TK_SPACE: 430 case TK_COMMENT: { 431 if( db->u1.isInterrupted ){ 432 pParse->rc = SQLITE_INTERRUPT; 433 sqlite3SetString(pzErrMsg, "interrupt", (char*)0); 434 goto abort_parse; 435 } 436 break; 437 } 438 case TK_ILLEGAL: { 439 if( pzErrMsg ){ 440 sqliteFree(*pzErrMsg); 441 *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"", 442 &pParse->sLastToken); 443 } 444 nErr++; 445 goto abort_parse; 446 } 447 case TK_SEMI: { 448 pParse->zTail = &zSql[i]; 449 /* Fall thru into the default case */ 450 } 451 default: { 452 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 453 lastTokenParsed = tokenType; 454 if( pParse->rc!=SQLITE_OK ){ 455 goto abort_parse; 456 } 457 break; 458 } 459 } 460 } 461 abort_parse: 462 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 463 if( lastTokenParsed!=TK_SEMI ){ 464 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 465 pParse->zTail = &zSql[i]; 466 } 467 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 468 } 469 sqlite3ParserFree(pEngine, sqlite3FreeX); 470 if( sqlite3MallocFailed() ){ 471 pParse->rc = SQLITE_NOMEM; 472 } 473 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 474 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0); 475 } 476 if( pParse->zErrMsg ){ 477 if( pzErrMsg && *pzErrMsg==0 ){ 478 *pzErrMsg = pParse->zErrMsg; 479 }else{ 480 sqliteFree(pParse->zErrMsg); 481 } 482 pParse->zErrMsg = 0; 483 if( !nErr ) nErr++; 484 } 485 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 486 sqlite3VdbeDelete(pParse->pVdbe); 487 pParse->pVdbe = 0; 488 } 489 #ifndef SQLITE_OMIT_SHARED_CACHE 490 if( pParse->nested==0 ){ 491 sqliteFree(pParse->aTableLock); 492 pParse->aTableLock = 0; 493 pParse->nTableLock = 0; 494 } 495 #endif 496 497 if( !IN_DECLARE_VTAB ){ 498 /* If the pParse->declareVtab flag is set, do not delete any table 499 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 500 ** will take responsibility for freeing the Table structure. 501 */ 502 sqlite3DeleteTable(pParse->pNewTable); 503 } 504 505 sqlite3DeleteTrigger(pParse->pNewTrigger); 506 sqliteFree(pParse->apVarExpr); 507 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ 508 pParse->rc = SQLITE_ERROR; 509 } 510 return nErr; 511 } 512