1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 ** 18 ** $Id: tokenize.c,v 1.163 2009/07/03 22:54:37 drh Exp $ 19 */ 20 #include "sqliteInt.h" 21 #include <stdlib.h> 22 23 /* 24 ** The charMap() macro maps alphabetic characters into their 25 ** lower-case ASCII equivalent. On ASCII machines, this is just 26 ** an upper-to-lower case map. On EBCDIC machines we also need 27 ** to adjust the encoding. Only alphabetic characters and underscores 28 ** need to be translated. 29 */ 30 #ifdef SQLITE_ASCII 31 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 32 #endif 33 #ifdef SQLITE_EBCDIC 34 # define charMap(X) ebcdicToAscii[(unsigned char)X] 35 const unsigned char ebcdicToAscii[] = { 36 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 45 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 46 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 47 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 49 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 50 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 51 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 53 }; 54 #endif 55 56 /* 57 ** The sqlite3KeywordCode function looks up an identifier to determine if 58 ** it is a keyword. If it is a keyword, the token code of that keyword is 59 ** returned. If the input is not a keyword, TK_ID is returned. 60 ** 61 ** The implementation of this routine was generated by a program, 62 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 63 ** The output of the mkkeywordhash.c program is written into a file 64 ** named keywordhash.h and then included into this source file by 65 ** the #include below. 66 */ 67 #include "keywordhash.h" 68 69 70 /* 71 ** If X is a character that can be used in an identifier then 72 ** IdChar(X) will be true. Otherwise it is false. 73 ** 74 ** For ASCII, any character with the high-order bit set is 75 ** allowed in an identifier. For 7-bit characters, 76 ** sqlite3IsIdChar[X] must be 1. 77 ** 78 ** For EBCDIC, the rules are more complex but have the same 79 ** end result. 80 ** 81 ** Ticket #1066. the SQL standard does not allow '$' in the 82 ** middle of identfiers. But many SQL implementations do. 83 ** SQLite will allow '$' in identifiers for compatibility. 84 ** But the feature is undocumented. 85 */ 86 #ifdef SQLITE_ASCII 87 const char sqlite3IsAsciiIdChar[] = { 88 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 89 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 95 }; 96 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20])) 97 #endif 98 #ifdef SQLITE_EBCDIC 99 const char sqlite3IsEbcdicIdChar[] = { 100 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 101 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 102 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 104 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 105 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 106 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 107 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 109 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 110 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 111 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 112 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 113 }; 114 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 115 #endif 116 117 118 /* 119 ** Return the length of the token that begins at z[0]. 120 ** Store the token type in *tokenType before returning. 121 */ 122 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 123 int i, c; 124 switch( *z ){ 125 case ' ': case '\t': case '\n': case '\f': case '\r': { 126 testcase( z[0]==' ' ); 127 testcase( z[0]=='\t' ); 128 testcase( z[0]=='\n' ); 129 testcase( z[0]=='\f' ); 130 testcase( z[0]=='\r' ); 131 for(i=1; sqlite3Isspace(z[i]); i++){} 132 *tokenType = TK_SPACE; 133 return i; 134 } 135 case '-': { 136 if( z[1]=='-' ){ 137 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 138 *tokenType = TK_SPACE; 139 return i; 140 } 141 *tokenType = TK_MINUS; 142 return 1; 143 } 144 case '(': { 145 *tokenType = TK_LP; 146 return 1; 147 } 148 case ')': { 149 *tokenType = TK_RP; 150 return 1; 151 } 152 case ';': { 153 *tokenType = TK_SEMI; 154 return 1; 155 } 156 case '+': { 157 *tokenType = TK_PLUS; 158 return 1; 159 } 160 case '*': { 161 *tokenType = TK_STAR; 162 return 1; 163 } 164 case '/': { 165 if( z[1]!='*' || z[2]==0 ){ 166 *tokenType = TK_SLASH; 167 return 1; 168 } 169 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 170 if( c ) i++; 171 *tokenType = TK_SPACE; 172 return i; 173 } 174 case '%': { 175 *tokenType = TK_REM; 176 return 1; 177 } 178 case '=': { 179 *tokenType = TK_EQ; 180 return 1 + (z[1]=='='); 181 } 182 case '<': { 183 if( (c=z[1])=='=' ){ 184 *tokenType = TK_LE; 185 return 2; 186 }else if( c=='>' ){ 187 *tokenType = TK_NE; 188 return 2; 189 }else if( c=='<' ){ 190 *tokenType = TK_LSHIFT; 191 return 2; 192 }else{ 193 *tokenType = TK_LT; 194 return 1; 195 } 196 } 197 case '>': { 198 if( (c=z[1])=='=' ){ 199 *tokenType = TK_GE; 200 return 2; 201 }else if( c=='>' ){ 202 *tokenType = TK_RSHIFT; 203 return 2; 204 }else{ 205 *tokenType = TK_GT; 206 return 1; 207 } 208 } 209 case '!': { 210 if( z[1]!='=' ){ 211 *tokenType = TK_ILLEGAL; 212 return 2; 213 }else{ 214 *tokenType = TK_NE; 215 return 2; 216 } 217 } 218 case '|': { 219 if( z[1]!='|' ){ 220 *tokenType = TK_BITOR; 221 return 1; 222 }else{ 223 *tokenType = TK_CONCAT; 224 return 2; 225 } 226 } 227 case ',': { 228 *tokenType = TK_COMMA; 229 return 1; 230 } 231 case '&': { 232 *tokenType = TK_BITAND; 233 return 1; 234 } 235 case '~': { 236 *tokenType = TK_BITNOT; 237 return 1; 238 } 239 case '`': 240 case '\'': 241 case '"': { 242 int delim = z[0]; 243 testcase( delim=='`' ); 244 testcase( delim=='\'' ); 245 testcase( delim=='"' ); 246 for(i=1; (c=z[i])!=0; i++){ 247 if( c==delim ){ 248 if( z[i+1]==delim ){ 249 i++; 250 }else{ 251 break; 252 } 253 } 254 } 255 if( c=='\'' ){ 256 *tokenType = TK_STRING; 257 return i+1; 258 }else if( c!=0 ){ 259 *tokenType = TK_ID; 260 return i+1; 261 }else{ 262 *tokenType = TK_ILLEGAL; 263 return i; 264 } 265 } 266 case '.': { 267 #ifndef SQLITE_OMIT_FLOATING_POINT 268 if( !sqlite3Isdigit(z[1]) ) 269 #endif 270 { 271 *tokenType = TK_DOT; 272 return 1; 273 } 274 /* If the next character is a digit, this is a floating point 275 ** number that begins with ".". Fall thru into the next case */ 276 } 277 case '0': case '1': case '2': case '3': case '4': 278 case '5': case '6': case '7': case '8': case '9': { 279 testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); 280 testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); 281 testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); 282 testcase( z[0]=='9' ); 283 *tokenType = TK_INTEGER; 284 for(i=0; sqlite3Isdigit(z[i]); i++){} 285 #ifndef SQLITE_OMIT_FLOATING_POINT 286 if( z[i]=='.' ){ 287 i++; 288 while( sqlite3Isdigit(z[i]) ){ i++; } 289 *tokenType = TK_FLOAT; 290 } 291 if( (z[i]=='e' || z[i]=='E') && 292 ( sqlite3Isdigit(z[i+1]) 293 || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) 294 ) 295 ){ 296 i += 2; 297 while( sqlite3Isdigit(z[i]) ){ i++; } 298 *tokenType = TK_FLOAT; 299 } 300 #endif 301 while( IdChar(z[i]) ){ 302 *tokenType = TK_ILLEGAL; 303 i++; 304 } 305 return i; 306 } 307 case '[': { 308 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 309 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 310 return i; 311 } 312 case '?': { 313 *tokenType = TK_VARIABLE; 314 for(i=1; sqlite3Isdigit(z[i]); i++){} 315 return i; 316 } 317 case '#': { 318 for(i=1; sqlite3Isdigit(z[i]); i++){} 319 if( i>1 ){ 320 /* Parameters of the form #NNN (where NNN is a number) are used 321 ** internally by sqlite3NestedParse. */ 322 *tokenType = TK_REGISTER; 323 return i; 324 } 325 /* Fall through into the next case if the '#' is not followed by 326 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 327 } 328 #ifndef SQLITE_OMIT_TCL_VARIABLE 329 case '$': 330 #endif 331 case '@': /* For compatibility with MS SQL Server */ 332 case ':': { 333 int n = 0; 334 testcase( z[0]=='$' ); testcase( z[0]=='@' ); testcase( z[0]==':' ); 335 *tokenType = TK_VARIABLE; 336 for(i=1; (c=z[i])!=0; i++){ 337 if( IdChar(c) ){ 338 n++; 339 #ifndef SQLITE_OMIT_TCL_VARIABLE 340 }else if( c=='(' && n>0 ){ 341 do{ 342 i++; 343 }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); 344 if( c==')' ){ 345 i++; 346 }else{ 347 *tokenType = TK_ILLEGAL; 348 } 349 break; 350 }else if( c==':' && z[i+1]==':' ){ 351 i++; 352 #endif 353 }else{ 354 break; 355 } 356 } 357 if( n==0 ) *tokenType = TK_ILLEGAL; 358 return i; 359 } 360 #ifndef SQLITE_OMIT_BLOB_LITERAL 361 case 'x': case 'X': { 362 testcase( z[0]=='x' ); testcase( z[0]=='X' ); 363 if( z[1]=='\'' ){ 364 *tokenType = TK_BLOB; 365 for(i=2; (c=z[i])!=0 && c!='\''; i++){ 366 if( !sqlite3Isxdigit(c) ){ 367 *tokenType = TK_ILLEGAL; 368 } 369 } 370 if( i%2 || !c ) *tokenType = TK_ILLEGAL; 371 if( c ) i++; 372 return i; 373 } 374 /* Otherwise fall through to the next case */ 375 } 376 #endif 377 default: { 378 if( !IdChar(*z) ){ 379 break; 380 } 381 for(i=1; IdChar(z[i]); i++){} 382 *tokenType = keywordCode((char*)z, i); 383 return i; 384 } 385 } 386 *tokenType = TK_ILLEGAL; 387 return 1; 388 } 389 390 /* 391 ** Run the parser on the given SQL string. The parser structure is 392 ** passed in. An SQLITE_ status code is returned. If an error occurs 393 ** then an and attempt is made to write an error message into 394 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that 395 ** error message. 396 */ 397 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 398 int nErr = 0; /* Number of errors encountered */ 399 int i; /* Loop counter */ 400 void *pEngine; /* The LEMON-generated LALR(1) parser */ 401 int tokenType; /* type of the next token */ 402 int lastTokenParsed = -1; /* type of the previous token */ 403 u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ 404 sqlite3 *db = pParse->db; /* The database connection */ 405 int mxSqlLen; /* Max length of an SQL string */ 406 407 408 mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; 409 if( db->activeVdbeCnt==0 ){ 410 db->u1.isInterrupted = 0; 411 } 412 pParse->rc = SQLITE_OK; 413 pParse->zTail = zSql; 414 i = 0; 415 assert( pzErrMsg!=0 ); 416 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc); 417 if( pEngine==0 ){ 418 db->mallocFailed = 1; 419 return SQLITE_NOMEM; 420 } 421 assert( pParse->pNewTable==0 ); 422 assert( pParse->pNewTrigger==0 ); 423 assert( pParse->nVar==0 ); 424 assert( pParse->nVarExpr==0 ); 425 assert( pParse->nVarExprAlloc==0 ); 426 assert( pParse->apVarExpr==0 ); 427 enableLookaside = db->lookaside.bEnabled; 428 if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; 429 while( !db->mallocFailed && zSql[i]!=0 ){ 430 assert( i>=0 ); 431 pParse->sLastToken.z = &zSql[i]; 432 pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); 433 i += pParse->sLastToken.n; 434 if( i>mxSqlLen ){ 435 pParse->rc = SQLITE_TOOBIG; 436 break; 437 } 438 switch( tokenType ){ 439 case TK_SPACE: { 440 if( db->u1.isInterrupted ){ 441 sqlite3ErrorMsg(pParse, "interrupt"); 442 pParse->rc = SQLITE_INTERRUPT; 443 goto abort_parse; 444 } 445 break; 446 } 447 case TK_ILLEGAL: { 448 sqlite3DbFree(db, *pzErrMsg); 449 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", 450 &pParse->sLastToken); 451 nErr++; 452 goto abort_parse; 453 } 454 case TK_SEMI: { 455 pParse->zTail = &zSql[i]; 456 /* Fall thru into the default case */ 457 } 458 default: { 459 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 460 lastTokenParsed = tokenType; 461 if( pParse->rc!=SQLITE_OK ){ 462 goto abort_parse; 463 } 464 break; 465 } 466 } 467 } 468 abort_parse: 469 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 470 if( lastTokenParsed!=TK_SEMI ){ 471 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 472 pParse->zTail = &zSql[i]; 473 } 474 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 475 } 476 #ifdef YYTRACKMAXSTACKDEPTH 477 sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, 478 sqlite3ParserStackPeak(pEngine) 479 ); 480 #endif /* YYDEBUG */ 481 sqlite3ParserFree(pEngine, sqlite3_free); 482 db->lookaside.bEnabled = enableLookaside; 483 if( db->mallocFailed ){ 484 pParse->rc = SQLITE_NOMEM; 485 } 486 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 487 sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); 488 } 489 assert( pzErrMsg!=0 ); 490 if( pParse->zErrMsg ){ 491 *pzErrMsg = pParse->zErrMsg; 492 pParse->zErrMsg = 0; 493 nErr++; 494 } 495 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 496 sqlite3VdbeDelete(pParse->pVdbe); 497 pParse->pVdbe = 0; 498 } 499 #ifndef SQLITE_OMIT_SHARED_CACHE 500 if( pParse->nested==0 ){ 501 sqlite3DbFree(db, pParse->aTableLock); 502 pParse->aTableLock = 0; 503 pParse->nTableLock = 0; 504 } 505 #endif 506 #ifndef SQLITE_OMIT_VIRTUALTABLE 507 sqlite3DbFree(db, pParse->apVtabLock); 508 #endif 509 510 if( !IN_DECLARE_VTAB ){ 511 /* If the pParse->declareVtab flag is set, do not delete any table 512 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 513 ** will take responsibility for freeing the Table structure. 514 */ 515 sqlite3DeleteTable(pParse->pNewTable); 516 } 517 518 sqlite3DeleteTrigger(db, pParse->pNewTrigger); 519 sqlite3DbFree(db, pParse->apVarExpr); 520 sqlite3DbFree(db, pParse->aAlias); 521 while( pParse->pAinc ){ 522 AutoincInfo *p = pParse->pAinc; 523 pParse->pAinc = p->pNext; 524 sqlite3DbFree(db, p); 525 } 526 while( pParse->pZombieTab ){ 527 Table *p = pParse->pZombieTab; 528 pParse->pZombieTab = p->pNextZombie; 529 sqlite3DeleteTable(p); 530 } 531 if( nErr>0 && pParse->rc==SQLITE_OK ){ 532 pParse->rc = SQLITE_ERROR; 533 } 534 return nErr; 535 } 536