1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 */ 18 #include "sqliteInt.h" 19 #include <stdlib.h> 20 21 /* 22 ** The charMap() macro maps alphabetic characters into their 23 ** lower-case ASCII equivalent. On ASCII machines, this is just 24 ** an upper-to-lower case map. On EBCDIC machines we also need 25 ** to adjust the encoding. Only alphabetic characters and underscores 26 ** need to be translated. 27 */ 28 #ifdef SQLITE_ASCII 29 # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 30 #endif 31 #ifdef SQLITE_EBCDIC 32 # define charMap(X) ebcdicToAscii[(unsigned char)X] 33 const unsigned char ebcdicToAscii[] = { 34 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 35 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 36 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 37 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 43 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 44 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 45 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 51 }; 52 #endif 53 54 /* 55 ** The sqlite3KeywordCode function looks up an identifier to determine if 56 ** it is a keyword. If it is a keyword, the token code of that keyword is 57 ** returned. If the input is not a keyword, TK_ID is returned. 58 ** 59 ** The implementation of this routine was generated by a program, 60 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 61 ** The output of the mkkeywordhash.c program is written into a file 62 ** named keywordhash.h and then included into this source file by 63 ** the #include below. 64 */ 65 #include "keywordhash.h" 66 67 68 /* 69 ** If X is a character that can be used in an identifier then 70 ** IdChar(X) will be true. Otherwise it is false. 71 ** 72 ** For ASCII, any character with the high-order bit set is 73 ** allowed in an identifier. For 7-bit characters, 74 ** sqlite3IsIdChar[X] must be 1. 75 ** 76 ** For EBCDIC, the rules are more complex but have the same 77 ** end result. 78 ** 79 ** Ticket #1066. the SQL standard does not allow '$' in the 80 ** middle of identfiers. But many SQL implementations do. 81 ** SQLite will allow '$' in identifiers for compatibility. 82 ** But the feature is undocumented. 83 */ 84 #ifdef SQLITE_ASCII 85 #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) 86 #endif 87 #ifdef SQLITE_EBCDIC 88 const char sqlite3IsEbcdicIdChar[] = { 89 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 90 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 92 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 93 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 94 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 96 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 97 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 98 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 99 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 100 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 102 }; 103 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 104 #endif 105 106 107 /* 108 ** Return the length of the token that begins at z[0]. 109 ** Store the token type in *tokenType before returning. 110 */ 111 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 112 int i, c; 113 switch( *z ){ 114 case ' ': case '\t': case '\n': case '\f': case '\r': { 115 testcase( z[0]==' ' ); 116 testcase( z[0]=='\t' ); 117 testcase( z[0]=='\n' ); 118 testcase( z[0]=='\f' ); 119 testcase( z[0]=='\r' ); 120 for(i=1; sqlite3Isspace(z[i]); i++){} 121 *tokenType = TK_SPACE; 122 return i; 123 } 124 case '-': { 125 if( z[1]=='-' ){ 126 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 127 *tokenType = TK_SPACE; 128 return i; 129 } 130 *tokenType = TK_MINUS; 131 return 1; 132 } 133 case '(': { 134 *tokenType = TK_LP; 135 return 1; 136 } 137 case ')': { 138 *tokenType = TK_RP; 139 return 1; 140 } 141 case ';': { 142 *tokenType = TK_SEMI; 143 return 1; 144 } 145 case '+': { 146 *tokenType = TK_PLUS; 147 return 1; 148 } 149 case '*': { 150 *tokenType = TK_STAR; 151 return 1; 152 } 153 case '/': { 154 if( z[1]!='*' || z[2]==0 ){ 155 *tokenType = TK_SLASH; 156 return 1; 157 } 158 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 159 if( c ) i++; 160 *tokenType = TK_SPACE; 161 return i; 162 } 163 case '%': { 164 *tokenType = TK_REM; 165 return 1; 166 } 167 case '=': { 168 *tokenType = TK_EQ; 169 return 1 + (z[1]=='='); 170 } 171 case '<': { 172 if( (c=z[1])=='=' ){ 173 *tokenType = TK_LE; 174 return 2; 175 }else if( c=='>' ){ 176 *tokenType = TK_NE; 177 return 2; 178 }else if( c=='<' ){ 179 *tokenType = TK_LSHIFT; 180 return 2; 181 }else{ 182 *tokenType = TK_LT; 183 return 1; 184 } 185 } 186 case '>': { 187 if( (c=z[1])=='=' ){ 188 *tokenType = TK_GE; 189 return 2; 190 }else if( c=='>' ){ 191 *tokenType = TK_RSHIFT; 192 return 2; 193 }else{ 194 *tokenType = TK_GT; 195 return 1; 196 } 197 } 198 case '!': { 199 if( z[1]!='=' ){ 200 *tokenType = TK_ILLEGAL; 201 return 2; 202 }else{ 203 *tokenType = TK_NE; 204 return 2; 205 } 206 } 207 case '|': { 208 if( z[1]!='|' ){ 209 *tokenType = TK_BITOR; 210 return 1; 211 }else{ 212 *tokenType = TK_CONCAT; 213 return 2; 214 } 215 } 216 case ',': { 217 *tokenType = TK_COMMA; 218 return 1; 219 } 220 case '&': { 221 *tokenType = TK_BITAND; 222 return 1; 223 } 224 case '~': { 225 *tokenType = TK_BITNOT; 226 return 1; 227 } 228 case '`': 229 case '\'': 230 case '"': { 231 int delim = z[0]; 232 testcase( delim=='`' ); 233 testcase( delim=='\'' ); 234 testcase( delim=='"' ); 235 for(i=1; (c=z[i])!=0; i++){ 236 if( c==delim ){ 237 if( z[i+1]==delim ){ 238 i++; 239 }else{ 240 break; 241 } 242 } 243 } 244 if( c=='\'' ){ 245 *tokenType = TK_STRING; 246 return i+1; 247 }else if( c!=0 ){ 248 *tokenType = TK_ID; 249 return i+1; 250 }else{ 251 *tokenType = TK_ILLEGAL; 252 return i; 253 } 254 } 255 case '.': { 256 #ifndef SQLITE_OMIT_FLOATING_POINT 257 if( !sqlite3Isdigit(z[1]) ) 258 #endif 259 { 260 *tokenType = TK_DOT; 261 return 1; 262 } 263 /* If the next character is a digit, this is a floating point 264 ** number that begins with ".". Fall thru into the next case */ 265 } 266 case '0': case '1': case '2': case '3': case '4': 267 case '5': case '6': case '7': case '8': case '9': { 268 testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); 269 testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); 270 testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); 271 testcase( z[0]=='9' ); 272 *tokenType = TK_INTEGER; 273 for(i=0; sqlite3Isdigit(z[i]); i++){} 274 #ifndef SQLITE_OMIT_FLOATING_POINT 275 if( z[i]=='.' ){ 276 i++; 277 while( sqlite3Isdigit(z[i]) ){ i++; } 278 *tokenType = TK_FLOAT; 279 } 280 if( (z[i]=='e' || z[i]=='E') && 281 ( sqlite3Isdigit(z[i+1]) 282 || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) 283 ) 284 ){ 285 i += 2; 286 while( sqlite3Isdigit(z[i]) ){ i++; } 287 *tokenType = TK_FLOAT; 288 } 289 #endif 290 while( IdChar(z[i]) ){ 291 *tokenType = TK_ILLEGAL; 292 i++; 293 } 294 return i; 295 } 296 case '[': { 297 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 298 *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 299 return i; 300 } 301 case '?': { 302 *tokenType = TK_VARIABLE; 303 for(i=1; sqlite3Isdigit(z[i]); i++){} 304 return i; 305 } 306 case '#': { 307 for(i=1; sqlite3Isdigit(z[i]); i++){} 308 if( i>1 ){ 309 /* Parameters of the form #NNN (where NNN is a number) are used 310 ** internally by sqlite3NestedParse. */ 311 *tokenType = TK_REGISTER; 312 return i; 313 } 314 /* Fall through into the next case if the '#' is not followed by 315 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 316 } 317 #ifndef SQLITE_OMIT_TCL_VARIABLE 318 case '$': 319 #endif 320 case '@': /* For compatibility with MS SQL Server */ 321 case ':': { 322 int n = 0; 323 testcase( z[0]=='$' ); testcase( z[0]=='@' ); testcase( z[0]==':' ); 324 *tokenType = TK_VARIABLE; 325 for(i=1; (c=z[i])!=0; i++){ 326 if( IdChar(c) ){ 327 n++; 328 #ifndef SQLITE_OMIT_TCL_VARIABLE 329 }else if( c=='(' && n>0 ){ 330 do{ 331 i++; 332 }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); 333 if( c==')' ){ 334 i++; 335 }else{ 336 *tokenType = TK_ILLEGAL; 337 } 338 break; 339 }else if( c==':' && z[i+1]==':' ){ 340 i++; 341 #endif 342 }else{ 343 break; 344 } 345 } 346 if( n==0 ) *tokenType = TK_ILLEGAL; 347 return i; 348 } 349 #ifndef SQLITE_OMIT_BLOB_LITERAL 350 case 'x': case 'X': { 351 testcase( z[0]=='x' ); testcase( z[0]=='X' ); 352 if( z[1]=='\'' ){ 353 *tokenType = TK_BLOB; 354 for(i=2; (c=z[i])!=0 && c!='\''; i++){ 355 if( !sqlite3Isxdigit(c) ){ 356 *tokenType = TK_ILLEGAL; 357 } 358 } 359 if( i%2 || !c ) *tokenType = TK_ILLEGAL; 360 if( c ) i++; 361 return i; 362 } 363 /* Otherwise fall through to the next case */ 364 } 365 #endif 366 default: { 367 if( !IdChar(*z) ){ 368 break; 369 } 370 for(i=1; IdChar(z[i]); i++){} 371 *tokenType = keywordCode((char*)z, i); 372 return i; 373 } 374 } 375 *tokenType = TK_ILLEGAL; 376 return 1; 377 } 378 379 /* 380 ** Run the parser on the given SQL string. The parser structure is 381 ** passed in. An SQLITE_ status code is returned. If an error occurs 382 ** then an and attempt is made to write an error message into 383 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that 384 ** error message. 385 */ 386 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 387 int nErr = 0; /* Number of errors encountered */ 388 int i; /* Loop counter */ 389 void *pEngine; /* The LEMON-generated LALR(1) parser */ 390 int tokenType; /* type of the next token */ 391 int lastTokenParsed = -1; /* type of the previous token */ 392 u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ 393 sqlite3 *db = pParse->db; /* The database connection */ 394 int mxSqlLen; /* Max length of an SQL string */ 395 396 397 mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; 398 if( db->activeVdbeCnt==0 ){ 399 db->u1.isInterrupted = 0; 400 } 401 pParse->rc = SQLITE_OK; 402 pParse->zTail = zSql; 403 i = 0; 404 assert( pzErrMsg!=0 ); 405 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc); 406 if( pEngine==0 ){ 407 db->mallocFailed = 1; 408 return SQLITE_NOMEM; 409 } 410 assert( pParse->pNewTable==0 ); 411 assert( pParse->pNewTrigger==0 ); 412 assert( pParse->nVar==0 ); 413 assert( pParse->nVarExpr==0 ); 414 assert( pParse->nVarExprAlloc==0 ); 415 assert( pParse->apVarExpr==0 ); 416 enableLookaside = db->lookaside.bEnabled; 417 if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; 418 while( !db->mallocFailed && zSql[i]!=0 ){ 419 assert( i>=0 ); 420 pParse->sLastToken.z = &zSql[i]; 421 pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); 422 i += pParse->sLastToken.n; 423 if( i>mxSqlLen ){ 424 pParse->rc = SQLITE_TOOBIG; 425 break; 426 } 427 switch( tokenType ){ 428 case TK_SPACE: { 429 if( db->u1.isInterrupted ){ 430 sqlite3ErrorMsg(pParse, "interrupt"); 431 pParse->rc = SQLITE_INTERRUPT; 432 goto abort_parse; 433 } 434 break; 435 } 436 case TK_ILLEGAL: { 437 sqlite3DbFree(db, *pzErrMsg); 438 *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", 439 &pParse->sLastToken); 440 nErr++; 441 goto abort_parse; 442 } 443 case TK_SEMI: { 444 pParse->zTail = &zSql[i]; 445 /* Fall thru into the default case */ 446 } 447 default: { 448 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 449 lastTokenParsed = tokenType; 450 if( pParse->rc!=SQLITE_OK ){ 451 goto abort_parse; 452 } 453 break; 454 } 455 } 456 } 457 abort_parse: 458 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 459 if( lastTokenParsed!=TK_SEMI ){ 460 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 461 pParse->zTail = &zSql[i]; 462 } 463 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 464 } 465 #ifdef YYTRACKMAXSTACKDEPTH 466 sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, 467 sqlite3ParserStackPeak(pEngine) 468 ); 469 #endif /* YYDEBUG */ 470 sqlite3ParserFree(pEngine, sqlite3_free); 471 db->lookaside.bEnabled = enableLookaside; 472 if( db->mallocFailed ){ 473 pParse->rc = SQLITE_NOMEM; 474 } 475 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 476 sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); 477 } 478 assert( pzErrMsg!=0 ); 479 if( pParse->zErrMsg ){ 480 *pzErrMsg = pParse->zErrMsg; 481 pParse->zErrMsg = 0; 482 nErr++; 483 } 484 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 485 sqlite3VdbeDelete(pParse->pVdbe); 486 pParse->pVdbe = 0; 487 } 488 #ifndef SQLITE_OMIT_SHARED_CACHE 489 if( pParse->nested==0 ){ 490 sqlite3DbFree(db, pParse->aTableLock); 491 pParse->aTableLock = 0; 492 pParse->nTableLock = 0; 493 } 494 #endif 495 #ifndef SQLITE_OMIT_VIRTUALTABLE 496 sqlite3DbFree(db, pParse->apVtabLock); 497 #endif 498 499 if( !IN_DECLARE_VTAB ){ 500 /* If the pParse->declareVtab flag is set, do not delete any table 501 ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 502 ** will take responsibility for freeing the Table structure. 503 */ 504 sqlite3DeleteTable(pParse->pNewTable); 505 } 506 507 sqlite3DeleteTrigger(db, pParse->pNewTrigger); 508 sqlite3DbFree(db, pParse->apVarExpr); 509 sqlite3DbFree(db, pParse->aAlias); 510 while( pParse->pAinc ){ 511 AutoincInfo *p = pParse->pAinc; 512 pParse->pAinc = p->pNext; 513 sqlite3DbFree(db, p); 514 } 515 while( pParse->pZombieTab ){ 516 Table *p = pParse->pZombieTab; 517 pParse->pZombieTab = p->pNextZombie; 518 sqlite3DeleteTable(p); 519 } 520 if( nErr>0 && pParse->rc==SQLITE_OK ){ 521 pParse->rc = SQLITE_ERROR; 522 } 523 return nErr; 524 } 525