1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 */ 18 #include "sqliteInt.h" 19 #include <stdlib.h> 20 21 /* 22 ** The charMap() macro maps alphabetic characters into their 23 ** lower-case ASCII equivalent. On ASCII machines, this is just 24 ** an upper-to-lower case map. On EBCDIC machines we also need 25 ** to adjust the encoding. Only alphabetic characters and underscores 26 ** need to be translated. 27 */ 28 #ifdef SQLITE_ASCII 29 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 30 #endif 31 #ifdef SQLITE_EBCDIC 32 # define charMap(X) ebcdicToAscii[(unsigned char)X] 33 const unsigned char ebcdicToAscii[] = { 34 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 43 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 44 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 45 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 51 }; 52 #endif 53 54 /* 55 ** The sqlite3KeywordCode function looks up an identifier to determine if 56 ** it is a keyword. If it is a keyword, the token code of that keyword is 57 ** returned. If the input is not a keyword, TK_ID is returned. 58 ** 59 ** The implementation of this routine was generated by a program, 60 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 61 ** The output of the mkkeywordhash.c program is written into a file 62 ** named keywordhash.h and then included into this source file by 63 ** the #include below. 64 */ 65 #include "keywordhash.h" 66 67 68 /* 69 ** If X is a character that can be used in an identifier then 70 ** IdChar(X) will be true. Otherwise it is false. 71 ** 72 ** For ASCII, any character with the high-order bit set is 73 ** allowed in an identifier. For 7-bit characters, 74 ** sqlite3IsIdChar[X] must be 1. 75 ** 76 ** For EBCDIC, the rules are more complex but have the same 77 ** end result. 78 ** 79 ** Ticket #1066. the SQL standard does not allow '$' in the 80 ** middle of identifiers. But many SQL implementations do. 81 ** SQLite will allow '$' in identifiers for compatibility. 82 ** But the feature is undocumented. 83 */ 84 #ifdef SQLITE_ASCII 85 #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) 86 #endif 87 #ifdef SQLITE_EBCDIC 88 const char sqlite3IsEbcdicIdChar[] = { 89 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 90 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 92 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 93 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 94 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 96 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 98 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 99 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 100 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 102 }; 103 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 104 #endif 105 106 /* Make the IdChar function accessible from ctime.c */ 107 #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS 108 int sqlite3IsIdChar(u8 c){ return IdChar(c); } 109 #endif 110 111 112 /* 113 ** Return the length of the token that begins at z[0]. 114 ** Store the token type in *tokenType before returning. 115 */ 116 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 117 int i, c; 118 switch( *z ){ 119 case ' ': case '\t': case '\n': case '\f': case '\r': { 120 testcase( z[0]==' ' ); 121 testcase( z[0]=='\t' ); 122 testcase( z[0]=='\n' ); 123 testcase( z[0]=='\f' ); 124 testcase( z[0]=='\r' ); 125 for(i=1; sqlite3Isspace(z[i]); i++){} 126 *tokenType = TK_SPACE; 127 return i; 128 } 129 case '-': { 130 if( z[1]=='-' ){ 131 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 132 *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ 133 return i; 134 } 135 *tokenType = TK_MINUS; 136 return 1; 137 } 138 case '(': { 139 *tokenType = TK_LP; 140 return 1; 141 } 142 case ')': { 143 *tokenType = TK_RP; 144 return 1; 145 } 146 case ';': { 147 *tokenType = TK_SEMI; 148 return 1; 149 } 150 case '+': { 151 *tokenType = TK_PLUS; 152 return 1; 153 } 154 case '*': { 155 *tokenType = TK_STAR; 156 return 1; 157 } 158 case '/': { 159 if( z[1]!='*' || z[2]==0 ){ 160 *tokenType = TK_SLASH; 161 return 1; 162 } 163 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 164 if( c ) i++; 165 *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ 166 return i; 167 } 168 case '%': { 169 *tokenType = TK_REM; 170 return 1; 171 } 172 case '=': { 173 *tokenType = TK_EQ; 174 return 1 + (z[1]=='='); 175 } 176 case '<': { 177 if( (c=z[1])=='=' ){ 178 *tokenType = TK_LE; 179 return 2; 180 }else if( c=='>' ){ 181 *tokenType = TK_NE; 182 return 2; 183 }else if( c=='<' ){ 184 *tokenType = TK_LSHIFT; 185 return 2; 186 }else{ 187 *tokenType = TK_LT; 188 return 1; 189 } 190 } 191 case '>': { 192 if( (c=z[1])=='=' ){ 193 *tokenType = TK_GE; 194 return 2; 195 }else if( c=='>' ){ 196 *tokenType = TK_RSHIFT; 197 return 2; 198 }else{ 199 *tokenType = TK_GT; 200 return 1; 201 } 202 } 203 case '!': { 204 if( z[1]!='=' ){ 205 *tokenType = TK_ILLEGAL; 206 return 2; 207 }else{ 208 *tokenType = TK_NE; 209 return 2; 210 } 211 } 212 case '|': { 213 if( z[1]!='|' ){ 214 *tokenType = TK_BITOR; 215 return 1; 216 }else{ 217 *tokenType = TK_CONCAT; 218 return 2; 219 } 220 } 221 case ',': { 222 *tokenType = TK_COMMA; 223 return 1; 224 } 225 case '&': { 226 *tokenType = TK_BITAND; 227 return 1; 228 } 229 case '~': { 230 *tokenType = TK_BITNOT; 231 return 1; 232 } 233 case '`': 234 case '\'': 235 case '"': { 236 int delim = z[0]; 237 testcase( delim=='`' ); 238 testcase( delim=='\'' ); 239 testcase( delim=='"' ); 240 for(i=1; (c=z[i])!=0; i++){ 241 if( c==delim ){ 242 if( z[i+1]==delim ){ 243 i++; 244 }else{ 245 break; 246 } 247 } 248 } 249 if( c=='\'' ){ 250 *tokenType = TK_STRING; 251 return i+1; 252 }else if( c!=0 ){ 253 *tokenType = TK_ID; 254 return i+1; 255 }else{ 256 *tokenType = TK_ILLEGAL; 257 return i; 258 } 259 } 260 case '.': { 261 #ifndef SQLITE_OMIT_FLOATING_POINT 262 if( !sqlite3Isdigit(z[1]) ) 263 #endif 264 { 265 *tokenType = TK_DOT; 266 return 1; 267 } 268 /* If the next character is a digit, this is a floating point 269 ** number that begins with ".". Fall thru into the next case */ 270 } 271 case '0': case '1': case '2': case '3': case '4': 272 case '5': case '6': case '7': case '8': case '9': { 273 testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); 274 testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); 275 testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); 276 testcase( z[0]=='9' ); 277 *tokenType = TK_INTEGER; 278 #ifndef SQLITE_OMIT_HEX_INTEGER 279 if( z[0]=='0' && (z[1]=='x' || z[1]=='X') && sqlite3Isxdigit(z[2]) ){ 280 for(i=3; sqlite3Isxdigit(z[i]); i++){} 281 return i; 282 } 283 #endif 284 for(i=0; sqlite3Isdigit(z[i]); i++){} 285 #ifndef SQLITE_OMIT_FLOATING_POINT 286 if( z[i]=='.' ){ 287 i++; 288 while( sqlite3Isdigit(z[i]) ){ i++; } 289 *tokenType = TK_FLOAT; 290 } 291 if( (z[i]=='e' || z[i]=='E') && 292 ( sqlite3Isdigit(z[i+1]) 293 || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) 294 ) 295 ){ 296 i += 2; 297 while( sqlite3Isdigit(z[i]) ){ i++; } 298 *tokenType = TK_FLOAT; 299 } 300 #endif 301 while( IdChar(z[i]) ){ 302 *tokenType = TK_ILLEGAL; 303 i++; 304 } 305 return i; 306 } 307 case '[': { 308 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 309 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 310 return i; 311 } 312 case '?': { 313 *tokenType = TK_VARIABLE; 314 for(i=1; sqlite3Isdigit(z[i]); i++){} 315 return i; 316 } 317 #ifndef SQLITE_OMIT_TCL_VARIABLE 318 case '$': 319 #endif 320 case '@': /* For compatibility with MS SQL Server */ 321 case '#': 322 case ':': { 323 int n = 0; 324 testcase( z[0]=='$' ); testcase( z[0]=='@' ); 325 testcase( z[0]==':' ); testcase( z[0]=='#' ); 326 *tokenType = TK_VARIABLE; 327 for(i=1; (c=z[i])!=0; i++){ 328 if( IdChar(c) ){ 329 n++; 330 #ifndef SQLITE_OMIT_TCL_VARIABLE 331 }else if( c=='(' && n>0 ){ 332 do{ 333 i++; 334 }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); 335 if( c==')' ){ 336 i++; 337 }else{ 338 *tokenType = TK_ILLEGAL; 339 } 340 break; 341 }else if( c==':' && z[i+1]==':' ){ 342 i++; 343 #endif 344 }else{ 345 break; 346 } 347 } 348 if( n==0 ) *tokenType = TK_ILLEGAL; 349 return i; 350 } 351 #ifndef SQLITE_OMIT_BLOB_LITERAL 352 case 'x': case 'X': { 353 testcase( z[0]=='x' ); testcase( z[0]=='X' ); 354 if( z[1]=='\'' ){ 355 *tokenType = TK_BLOB; 356 for(i=2; sqlite3Isxdigit(z[i]); i++){} 357 if( z[i]!='\'' || i%2 ){ 358 *tokenType = TK_ILLEGAL; 359 while( z[i] && z[i]!='\'' ){ i++; } 360 } 361 if( z[i] ) i++; 362 return i; 363 } 364 /* Otherwise fall through to the next case */ 365 } 366 #endif 367 default: { 368 if( !IdChar(*z) ){ 369 break; 370 } 371 for(i=1; IdChar(z[i]); i++){} 372 *tokenType = keywordCode((char*)z, i); 373 return i; 374 } 375 } 376 *tokenType = TK_ILLEGAL; 377 return 1; 378 } 379 380 /* 381 ** Run the parser on the given SQL string. The parser structure is 382 ** passed in. An SQLITE_ status code is returned. If an error occurs 383 ** then an and attempt is made to write an error message into 384 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that 385 ** error message. 386 */ 387 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 388 int nErr = 0; /* Number of errors encountered */ 389 int i; /* Loop counter */ 390 void *pEngine; /* The LEMON-generated LALR(1) parser */ 391 int tokenType; /* type of the next token */ 392 int lastTokenParsed = -1; /* type of the previous token */ 393 u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ 394 sqlite3 *db = pParse->db; /* The database connection */ 395 int mxSqlLen; /* Max length of an SQL string */ 396 397 assert( zSql!=0 ); 398 mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; 399 if( db->nVdbeActive==0 ){ 400 db->u1.isInterrupted = 0; 401 } 402 pParse->rc = SQLITE_OK; 403 pParse->zTail = zSql; 404 i = 0; 405 assert( pzErrMsg!=0 ); 406 /* sqlite3ParserTrace(stdout, "parser: "); */ 407 pEngine = sqlite3ParserAlloc(sqlite3Malloc); 408 if( pEngine==0 ){ 409 db->mallocFailed = 1; 410 return SQLITE_NOMEM; 411 } 412 assert( pParse->pNewTable==0 ); 413 assert( pParse->pNewTrigger==0 ); 414 assert( pParse->nVar==0 ); 415 assert( pParse->nzVar==0 ); 416 assert( pParse->azVar==0 ); 417 enableLookaside = db->lookaside.bEnabled; 418 if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; 419 while( !db->mallocFailed && zSql[i]!=0 ){ 420 assert( i>=0 ); 421 pParse->sLastToken.z = &zSql[i]; 422 pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); 423 i += pParse->sLastToken.n; 424 if( i>mxSqlLen ){ 425 pParse->rc = SQLITE_TOOBIG; 426 break; 427 } 428 switch( tokenType ){ 429 case TK_SPACE: { 430 if( db->u1.isInterrupted ){ 431 sqlite3ErrorMsg(pParse, "interrupt"); 432 pParse->rc = SQLITE_INTERRUPT; 433 goto abort_parse; 434 } 435 break; 436 } 437 case TK_ILLEGAL: { 438 sqlite3ErrorMsg(pParse, "unrecognized token: \"%T\"", 439 &pParse->sLastToken); 440 goto abort_parse; 441 } 442 case TK_SEMI: { 443 pParse->zTail = &zSql[i]; 444 /* Fall thru into the default case */ 445 } 446 default: { 447 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 448 lastTokenParsed = tokenType; 449 if( pParse->rc!=SQLITE_OK ){ 450 goto abort_parse; 451 } 452 break; 453 } 454 } 455 } 456 abort_parse: 457 assert( nErr==0 ); 458 if( pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ 459 assert( zSql[i]==0 ); 460 if( lastTokenParsed!=TK_SEMI ){ 461 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 462 pParse->zTail = &zSql[i]; 463 } 464 if( pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ 465 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 466 } 467 } 468 #ifdef YYTRACKMAXSTACKDEPTH 469 sqlite3_mutex_enter(sqlite3MallocMutex()); 470 sqlite3StatusHighwater(SQLITE_STATUS_PARSER_STACK, 471 sqlite3ParserStackPeak(pEngine) 472 ); 473 sqlite3_mutex_leave(sqlite3MallocMutex()); 474 #endif /* YYDEBUG */ 475 sqlite3ParserFree(pEngine, sqlite3_free); 476 db->lookaside.bEnabled = enableLookaside; 477 if( db->mallocFailed ){ 478 pParse->rc = SQLITE_NOMEM; 479 } 480 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 481 pParse->zErrMsg = sqlite3MPrintf(db, "%s", sqlite3ErrStr(pParse->rc)); 482 } 483 assert( pzErrMsg!=0 ); 484 if( pParse->zErrMsg ){ 485 *pzErrMsg = pParse->zErrMsg; 486 sqlite3_log(pParse->rc, "%s", *pzErrMsg); 487 pParse->zErrMsg = 0; 488 nErr++; 489 } 490 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 491 sqlite3VdbeDelete(pParse->pVdbe); 492 pParse->pVdbe = 0; 493 } 494 #ifndef SQLITE_OMIT_SHARED_CACHE 495 if( pParse->nested==0 ){ 496 sqlite3DbFree(db, pParse->aTableLock); 497 pParse->aTableLock = 0; 498 pParse->nTableLock = 0; 499 } 500 #endif 501 #ifndef SQLITE_OMIT_VIRTUALTABLE 502 sqlite3_free(pParse->apVtabLock); 503 #endif 504 505 if( !IN_DECLARE_VTAB ){ 506 /* If the pParse->declareVtab flag is set, do not delete any table 507 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 508 ** will take responsibility for freeing the Table structure. 509 */ 510 sqlite3DeleteTable(db, pParse->pNewTable); 511 } 512 513 if( pParse->bFreeWith ) sqlite3WithDelete(db, pParse->pWith); 514 sqlite3DeleteTrigger(db, pParse->pNewTrigger); 515 for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]); 516 sqlite3DbFree(db, pParse->azVar); 517 while( pParse->pAinc ){ 518 AutoincInfo *p = pParse->pAinc; 519 pParse->pAinc = p->pNext; 520 sqlite3DbFree(db, p); 521 } 522 while( pParse->pZombieTab ){ 523 Table *p = pParse->pZombieTab; 524 pParse->pZombieTab = p->pNextZombie; 525 sqlite3DeleteTable(db, p); 526 } 527 assert( nErr==0 || pParse->rc!=SQLITE_OK ); 528 return nErr; 529 } 530