1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 ** 18 ** $Id: tokenize.c,v 1.60 2003/05/04 18:30:59 drh Exp $ 19 */ 20 #include "sqliteInt.h" 21 #include "os.h" 22 #include <ctype.h> 23 #include <stdlib.h> 24 25 /* 26 ** All the keywords of the SQL language are stored as in a hash 27 ** table composed of instances of the following structure. 28 */ 29 typedef struct Keyword Keyword; 30 struct Keyword { 31 char *zName; /* The keyword name */ 32 int len; /* Number of characters in the keyword */ 33 int tokenType; /* The token value for this keyword */ 34 Keyword *pNext; /* Next keyword with the same hash */ 35 }; 36 37 /* 38 ** These are the keywords 39 */ 40 static Keyword aKeywordTable[] = { 41 { "ABORT", 0, TK_ABORT, 0 }, 42 { "AFTER", 0, TK_AFTER, 0 }, 43 { "ALL", 0, TK_ALL, 0 }, 44 { "AND", 0, TK_AND, 0 }, 45 { "AS", 0, TK_AS, 0 }, 46 { "ASC", 0, TK_ASC, 0 }, 47 { "ATTACH", 0, TK_ATTACH, 0 }, 48 { "BEFORE", 0, TK_BEFORE, 0 }, 49 { "BEGIN", 0, TK_BEGIN, 0 }, 50 { "BETWEEN", 0, TK_BETWEEN, 0 }, 51 { "BY", 0, TK_BY, 0 }, 52 { "CASCADE", 0, TK_CASCADE, 0 }, 53 { "CASE", 0, TK_CASE, 0 }, 54 { "CHECK", 0, TK_CHECK, 0 }, 55 { "CLUSTER", 0, TK_CLUSTER, 0 }, 56 { "COLLATE", 0, TK_COLLATE, 0 }, 57 { "COMMIT", 0, TK_COMMIT, 0 }, 58 { "CONFLICT", 0, TK_CONFLICT, 0 }, 59 { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, 60 { "COPY", 0, TK_COPY, 0 }, 61 { "CREATE", 0, TK_CREATE, 0 }, 62 { "CROSS", 0, TK_JOIN_KW, 0 }, 63 { "DATABASE", 0, TK_DATABASE, 0 }, 64 { "DEFAULT", 0, TK_DEFAULT, 0 }, 65 { "DEFERRED", 0, TK_DEFERRED, 0 }, 66 { "DEFERRABLE", 0, TK_DEFERRABLE, 0 }, 67 { "DELETE", 0, TK_DELETE, 0 }, 68 { "DELIMITERS", 0, TK_DELIMITERS, 0 }, 69 { "DESC", 0, TK_DESC, 0 }, 70 { "DETACH", 0, TK_DETACH, 0 }, 71 { "DISTINCT", 0, TK_DISTINCT, 0 }, 72 { "DROP", 0, TK_DROP, 0 }, 73 { "END", 0, TK_END, 0 }, 74 { "EACH", 0, TK_EACH, 0 }, 75 { "ELSE", 0, TK_ELSE, 0 }, 76 { "EXCEPT", 0, TK_EXCEPT, 0 }, 77 { "EXPLAIN", 0, TK_EXPLAIN, 0 }, 78 { "FAIL", 0, TK_FAIL, 0 }, 79 { "FOR", 0, TK_FOR, 0 }, 80 { "FOREIGN", 0, TK_FOREIGN, 0 }, 81 { "FROM", 0, TK_FROM, 0 }, 82 { "FULL", 0, TK_JOIN_KW, 0 }, 83 { "GLOB", 0, TK_GLOB, 0 }, 84 { "GROUP", 0, TK_GROUP, 0 }, 85 { "HAVING", 0, TK_HAVING, 0 }, 86 { "IGNORE", 0, TK_IGNORE, 0 }, 87 { "IMMEDIATE", 0, TK_IMMEDIATE, 0 }, 88 { "IN", 0, TK_IN, 0 }, 89 { "INDEX", 0, TK_INDEX, 0 }, 90 { "INITIALLY", 0, TK_INITIALLY, 0 }, 91 { "INNER", 0, TK_JOIN_KW, 0 }, 92 { "INSERT", 0, TK_INSERT, 0 }, 93 { "INSTEAD", 0, TK_INSTEAD, 0 }, 94 { "INTERSECT", 0, TK_INTERSECT, 0 }, 95 { "INTO", 0, TK_INTO, 0 }, 96 { "IS", 0, TK_IS, 0 }, 97 { "ISNULL", 0, TK_ISNULL, 0 }, 98 { "JOIN", 0, TK_JOIN, 0 }, 99 { "KEY", 0, TK_KEY, 0 }, 100 { "LEFT", 0, TK_JOIN_KW, 0 }, 101 { "LIKE", 0, TK_LIKE, 0 }, 102 { "LIMIT", 0, TK_LIMIT, 0 }, 103 { "MATCH", 0, TK_MATCH, 0 }, 104 { "NATURAL", 0, TK_JOIN_KW, 0 }, 105 { "NOT", 0, TK_NOT, 0 }, 106 { "NOTNULL", 0, TK_NOTNULL, 0 }, 107 { "NULL", 0, TK_NULL, 0 }, 108 { "OF", 0, TK_OF, 0 }, 109 { "OFFSET", 0, TK_OFFSET, 0 }, 110 { "ON", 0, TK_ON, 0 }, 111 { "OR", 0, TK_OR, 0 }, 112 { "ORDER", 0, TK_ORDER, 0 }, 113 { "OUTER", 0, TK_JOIN_KW, 0 }, 114 { "PRAGMA", 0, TK_PRAGMA, 0 }, 115 { "PRIMARY", 0, TK_PRIMARY, 0 }, 116 { "RAISE", 0, TK_RAISE, 0 }, 117 { "REFERENCES", 0, TK_REFERENCES, 0 }, 118 { "REPLACE", 0, TK_REPLACE, 0 }, 119 { "RESTRICT", 0, TK_RESTRICT, 0 }, 120 { "RIGHT", 0, TK_JOIN_KW, 0 }, 121 { "ROLLBACK", 0, TK_ROLLBACK, 0 }, 122 { "ROW", 0, TK_ROW, 0 }, 123 { "SELECT", 0, TK_SELECT, 0 }, 124 { "SET", 0, TK_SET, 0 }, 125 { "STATEMENT", 0, TK_STATEMENT, 0 }, 126 { "TABLE", 0, TK_TABLE, 0 }, 127 { "TEMP", 0, TK_TEMP, 0 }, 128 { "TEMPORARY", 0, TK_TEMP, 0 }, 129 { "THEN", 0, TK_THEN, 0 }, 130 { "TRANSACTION", 0, TK_TRANSACTION, 0 }, 131 { "TRIGGER", 0, TK_TRIGGER, 0 }, 132 { "UNION", 0, TK_UNION, 0 }, 133 { "UNIQUE", 0, TK_UNIQUE, 0 }, 134 { "UPDATE", 0, TK_UPDATE, 0 }, 135 { "USING", 0, TK_USING, 0 }, 136 { "VACUUM", 0, TK_VACUUM, 0 }, 137 { "VALUES", 0, TK_VALUES, 0 }, 138 { "VIEW", 0, TK_VIEW, 0 }, 139 { "WHEN", 0, TK_WHEN, 0 }, 140 { "WHERE", 0, TK_WHERE, 0 }, 141 }; 142 143 /* 144 ** This is the hash table 145 */ 146 #define KEY_HASH_SIZE 71 147 static Keyword *apHashTable[KEY_HASH_SIZE]; 148 149 150 /* 151 ** This function looks up an identifier to determine if it is a 152 ** keyword. If it is a keyword, the token code of that keyword is 153 ** returned. If the input is not a keyword, TK_ID is returned. 154 */ 155 int sqliteKeywordCode(const char *z, int n){ 156 int h; 157 Keyword *p; 158 if( aKeywordTable[0].len==0 ){ 159 /* Initialize the keyword hash table */ 160 sqliteOsEnterMutex(); 161 if( aKeywordTable[0].len==0 ){ 162 int i; 163 int n; 164 n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); 165 for(i=0; i<n; i++){ 166 aKeywordTable[i].len = strlen(aKeywordTable[i].zName); 167 h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); 168 h %= KEY_HASH_SIZE; 169 aKeywordTable[i].pNext = apHashTable[h]; 170 apHashTable[h] = &aKeywordTable[i]; 171 } 172 } 173 sqliteOsLeaveMutex(); 174 } 175 h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; 176 for(p=apHashTable[h]; p; p=p->pNext){ 177 if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ 178 return p->tokenType; 179 } 180 } 181 return TK_ID; 182 } 183 184 185 /* 186 ** If X is a character that can be used in an identifier then 187 ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0. 188 ** 189 ** In this implementation, an identifier can be a string of 190 ** alphabetic characters, digits, and "_" plus any character 191 ** with the high-order bit set. The latter rule means that 192 ** any sequence of UTF-8 characters or characters taken from 193 ** an extended ISO8859 character set can form an identifier. 194 */ 195 static const char isIdChar[] = { 196 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 200 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 201 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 202 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 203 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 204 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 205 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */ 206 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */ 207 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */ 208 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */ 209 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */ 210 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */ 211 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */ 212 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */ 213 }; 214 215 216 /* 217 ** Return the length of the token that begins at z[0]. Return 218 ** -1 if the token is (or might be) incomplete. Store the token 219 ** type in *tokenType before returning. 220 */ 221 static int sqliteGetToken(const unsigned char *z, int *tokenType){ 222 int i; 223 switch( *z ){ 224 case ' ': case '\t': case '\n': case '\f': case '\r': { 225 for(i=1; isspace(z[i]); i++){} 226 *tokenType = TK_SPACE; 227 return i; 228 } 229 case '-': { 230 if( z[1]==0 ) return -1; 231 if( z[1]=='-' ){ 232 for(i=2; z[i] && z[i]!='\n'; i++){} 233 *tokenType = TK_COMMENT; 234 return i; 235 } 236 *tokenType = TK_MINUS; 237 return 1; 238 } 239 case '(': { 240 if( z[1]=='+' && z[2]==')' ){ 241 *tokenType = TK_ORACLE_OUTER_JOIN; 242 return 3; 243 }else{ 244 *tokenType = TK_LP; 245 return 1; 246 } 247 } 248 case ')': { 249 *tokenType = TK_RP; 250 return 1; 251 } 252 case ';': { 253 *tokenType = TK_SEMI; 254 return 1; 255 } 256 case '+': { 257 *tokenType = TK_PLUS; 258 return 1; 259 } 260 case '*': { 261 *tokenType = TK_STAR; 262 return 1; 263 } 264 case '/': { 265 if( z[1]!='*' || z[2]==0 ){ 266 *tokenType = TK_SLASH; 267 return 1; 268 } 269 for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){} 270 if( z[i] ) i++; 271 *tokenType = TK_COMMENT; 272 return i; 273 } 274 case '%': { 275 *tokenType = TK_REM; 276 return 1; 277 } 278 case '=': { 279 *tokenType = TK_EQ; 280 return 1 + (z[1]=='='); 281 } 282 case '<': { 283 if( z[1]=='=' ){ 284 *tokenType = TK_LE; 285 return 2; 286 }else if( z[1]=='>' ){ 287 *tokenType = TK_NE; 288 return 2; 289 }else if( z[1]=='<' ){ 290 *tokenType = TK_LSHIFT; 291 return 2; 292 }else{ 293 *tokenType = TK_LT; 294 return 1; 295 } 296 } 297 case '>': { 298 if( z[1]=='=' ){ 299 *tokenType = TK_GE; 300 return 2; 301 }else if( z[1]=='>' ){ 302 *tokenType = TK_RSHIFT; 303 return 2; 304 }else{ 305 *tokenType = TK_GT; 306 return 1; 307 } 308 } 309 case '!': { 310 if( z[1]!='=' ){ 311 *tokenType = TK_ILLEGAL; 312 return 2; 313 }else{ 314 *tokenType = TK_NE; 315 return 2; 316 } 317 } 318 case '|': { 319 if( z[1]!='|' ){ 320 *tokenType = TK_BITOR; 321 return 1; 322 }else{ 323 *tokenType = TK_CONCAT; 324 return 2; 325 } 326 } 327 case ',': { 328 *tokenType = TK_COMMA; 329 return 1; 330 } 331 case '&': { 332 *tokenType = TK_BITAND; 333 return 1; 334 } 335 case '~': { 336 *tokenType = TK_BITNOT; 337 return 1; 338 } 339 case '\'': case '"': { 340 int delim = z[0]; 341 for(i=1; z[i]; i++){ 342 if( z[i]==delim ){ 343 if( z[i+1]==delim ){ 344 i++; 345 }else{ 346 break; 347 } 348 } 349 } 350 if( z[i] ) i++; 351 *tokenType = TK_STRING; 352 return i; 353 } 354 case '.': { 355 *tokenType = TK_DOT; 356 return 1; 357 } 358 case '0': case '1': case '2': case '3': case '4': 359 case '5': case '6': case '7': case '8': case '9': { 360 *tokenType = TK_INTEGER; 361 for(i=1; isdigit(z[i]); i++){} 362 if( z[i]=='.' && isdigit(z[i+1]) ){ 363 i += 2; 364 while( isdigit(z[i]) ){ i++; } 365 *tokenType = TK_FLOAT; 366 } 367 if( (z[i]=='e' || z[i]=='E') && 368 ( isdigit(z[i+1]) 369 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 370 ) 371 ){ 372 i += 2; 373 while( isdigit(z[i]) ){ i++; } 374 *tokenType = TK_FLOAT; 375 } 376 return i; 377 } 378 case '[': { 379 for(i=1; z[i] && z[i-1]!=']'; i++){} 380 *tokenType = TK_ID; 381 return i; 382 } 383 default: { 384 if( !isIdChar[*z] ){ 385 break; 386 } 387 for(i=1; isIdChar[z[i]]; i++){} 388 *tokenType = sqliteKeywordCode((char*)z, i); 389 return i; 390 } 391 } 392 *tokenType = TK_ILLEGAL; 393 return 1; 394 } 395 396 /* 397 ** Run the parser on the given SQL string. The parser structure is 398 ** passed in. An SQLITE_ status code is returned. If an error occurs 399 ** and pzErrMsg!=NULL then an error message might be written into 400 ** memory obtained from malloc() and *pzErrMsg made to point to that 401 ** error message. Or maybe not. 402 */ 403 int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 404 int nErr = 0; 405 int i; 406 void *pEngine; 407 int tokenType; 408 int lastTokenParsed = -1; 409 sqlite *db = pParse->db; 410 extern void *sqliteParserAlloc(void*(*)(int)); 411 extern void sqliteParserFree(void*, void(*)(void*)); 412 extern int sqliteParser(void*, int, Token, Parse*); 413 414 db->flags &= ~SQLITE_Interrupt; 415 pParse->rc = SQLITE_OK; 416 i = 0; 417 pEngine = sqliteParserAlloc((void*(*)(int))malloc); 418 if( pEngine==0 ){ 419 sqliteSetString(pzErrMsg, "out of memory", 0); 420 return 1; 421 } 422 pParse->sLastToken.dyn = 0; 423 pParse->zTail = zSql; 424 while( sqlite_malloc_failed==0 && zSql[i]!=0 ){ 425 426 assert( i>=0 ); 427 pParse->sLastToken.z = &zSql[i]; 428 assert( pParse->sLastToken.dyn==0 ); 429 pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType); 430 i += pParse->sLastToken.n; 431 switch( tokenType ){ 432 case TK_SPACE: 433 case TK_COMMENT: { 434 if( (db->flags & SQLITE_Interrupt)!=0 ){ 435 pParse->rc = SQLITE_INTERRUPT; 436 sqliteSetString(pzErrMsg, "interrupt", 0); 437 goto abort_parse; 438 } 439 break; 440 } 441 case TK_ILLEGAL: { 442 sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, 443 pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0); 444 nErr++; 445 goto abort_parse; 446 } 447 case TK_SEMI: { 448 pParse->zTail = &zSql[i]; 449 /* Fall thru into the default case */ 450 } 451 default: { 452 sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); 453 lastTokenParsed = tokenType; 454 if( pParse->rc!=SQLITE_OK ){ 455 goto abort_parse; 456 } 457 break; 458 } 459 } 460 } 461 abort_parse: 462 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 463 if( lastTokenParsed!=TK_SEMI ){ 464 sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 465 pParse->zTail = &zSql[i]; 466 } 467 sqliteParser(pEngine, 0, pParse->sLastToken, pParse); 468 } 469 sqliteParserFree(pEngine, free); 470 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 471 sqliteSetString(&pParse->zErrMsg, sqlite_error_string(pParse->rc), 0); 472 } 473 if( pParse->zErrMsg ){ 474 if( pzErrMsg && *pzErrMsg==0 ){ 475 *pzErrMsg = pParse->zErrMsg; 476 }else{ 477 sqliteFree(pParse->zErrMsg); 478 } 479 pParse->zErrMsg = 0; 480 if( !nErr ) nErr++; 481 } 482 if( pParse->pVdbe && (pParse->useCallback || pParse->nErr>0) ){ 483 sqliteVdbeDelete(pParse->pVdbe); 484 pParse->pVdbe = 0; 485 } 486 if( pParse->pNewTable ){ 487 sqliteDeleteTable(pParse->db, pParse->pNewTable); 488 pParse->pNewTable = 0; 489 } 490 if( pParse->pNewTrigger ){ 491 sqliteDeleteTrigger(pParse->pNewTrigger); 492 pParse->pNewTrigger = 0; 493 } 494 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ 495 pParse->rc = SQLITE_ERROR; 496 } 497 return nErr; 498 } 499 500 /* 501 ** Token types used by the sqlite_complete() routine. See the header 502 ** comments on that procedure for additional information. 503 */ 504 #define tkEXPLAIN 0 505 #define tkCREATE 1 506 #define tkTEMP 2 507 #define tkTRIGGER 3 508 #define tkEND 4 509 #define tkSEMI 5 510 #define tkWS 6 511 #define tkOTHER 7 512 513 /* 514 ** Return TRUE if the given SQL string ends in a semicolon. 515 ** 516 ** Special handling is require for CREATE TRIGGER statements. 517 ** Whenever the CREATE TRIGGER keywords are seen, the statement 518 ** must end with ";END;". 519 ** 520 ** This implementation uses a state machine with 7 states: 521 ** 522 ** (0) START At the beginning or end of an SQL statement. This routine 523 ** returns 1 if it ends in the START state and 0 if it ends 524 ** in any other state. 525 ** 526 ** (1) EXPLAIN The keyword EXPLAIN has been seen at the beginning of 527 ** a statement. 528 ** 529 ** (2) CREATE The keyword CREATE has been seen at the beginning of a 530 ** statement, possibly preceeded by EXPLAIN and/or followed by 531 ** TEMP or TEMPORARY 532 ** 533 ** (3) NORMAL We are in the middle of statement which ends with a single 534 ** semicolon. 535 ** 536 ** (4) TRIGGER We are in the middle of a trigger definition that must be 537 ** ended by a semicolon, the keyword END, and another semicolon. 538 ** 539 ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at 540 ** the end of a trigger definition. 541 ** 542 ** (6) END We've seen the ";END" of the ";END;" that occurs at the end 543 ** of a trigger difinition. 544 ** 545 ** Transitions between states above are determined by tokens extracted 546 ** from the input. The following tokens are significant: 547 ** 548 ** (0) tkEXPLAIN The "explain" keyword. 549 ** (1) tkCREATE The "create" keyword. 550 ** (2) tkTEMP The "temp" or "temporary" keyword. 551 ** (3) tkTRIGGER The "trigger" keyword. 552 ** (4) tkEND The "end" keyword. 553 ** (5) tkSEMI A semicolon. 554 ** (6) tkWS Whitespace 555 ** (7) tkOTHER Any other SQL token. 556 ** 557 ** Whitespace never causes a state transition and is always ignored. 558 */ 559 int sqlite_complete(const char *zSql){ 560 u8 state = 0; /* Current state, using numbers defined in header comment */ 561 u8 token; /* Value of the next token */ 562 563 /* The following matrix defines the transition from one state to another 564 ** according to what token is seen. trans[state][token] returns the 565 ** next state. 566 */ 567 static const u8 trans[7][8] = { 568 /* Token: */ 569 /* State: ** EXPLAIN CREATE TEMP TRIGGER END SEMI WS OTHER */ 570 /* 0 START: */ { 1, 2, 3, 3, 3, 0, 0, 3, }, 571 /* 1 EXPLAIN: */ { 3, 2, 3, 3, 3, 0, 1, 3, }, 572 /* 2 CREATE: */ { 3, 3, 2, 4, 3, 0, 2, 3, }, 573 /* 3 NORMAL: */ { 3, 3, 3, 3, 3, 0, 3, 3, }, 574 /* 4 TRIGGER: */ { 4, 4, 4, 4, 4, 5, 4, 4, }, 575 /* 5 SEMI: */ { 4, 4, 4, 4, 6, 5, 5, 4, }, 576 /* 6 END: */ { 4, 4, 4, 4, 4, 0, 6, 4, }, 577 }; 578 579 while( *zSql ){ 580 switch( *zSql ){ 581 case ';': { /* A semicolon */ 582 token = tkSEMI; 583 break; 584 } 585 case ' ': 586 case '\r': 587 case '\t': 588 case '\n': 589 case '\f': { /* White space is ignored */ 590 token = tkWS; 591 break; 592 } 593 case '/': { /* C-style comments */ 594 if( zSql[1]!='*' ){ 595 token = tkOTHER; 596 break; 597 } 598 zSql += 2; 599 while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } 600 if( zSql[0]==0 ) return 0; 601 zSql++; 602 token = tkWS; 603 break; 604 } 605 case '-': { /* SQL-style comments from "--" to end of line */ 606 if( zSql[1]!='-' ){ 607 token = tkOTHER; 608 break; 609 } 610 while( *zSql && *zSql!='\n' ){ zSql++; } 611 if( *zSql==0 ) return state==0; 612 token = tkWS; 613 break; 614 } 615 case '[': { /* Microsoft-style identifiers in [...] */ 616 zSql++; 617 while( *zSql && *zSql!=']' ){ zSql++; } 618 if( *zSql==0 ) return 0; 619 token = tkOTHER; 620 break; 621 } 622 case '"': /* single- and double-quoted strings */ 623 case '\'': { 624 int c = *zSql; 625 zSql++; 626 while( *zSql && *zSql!=c ){ zSql++; } 627 if( *zSql==0 ) return 0; 628 token = tkOTHER; 629 break; 630 } 631 default: { 632 if( isIdChar[(u8)*zSql] ){ 633 /* Keywords and unquoted identifiers */ 634 int nId; 635 for(nId=1; isIdChar[(u8)zSql[nId]]; nId++){} 636 switch( *zSql ){ 637 case 'c': case 'C': { 638 if( nId==6 && sqliteStrNICmp(zSql, "create", 6)==0 ){ 639 token = tkCREATE; 640 }else{ 641 token = tkOTHER; 642 } 643 break; 644 } 645 case 't': case 'T': { 646 if( nId==7 && sqliteStrNICmp(zSql, "trigger", 7)==0 ){ 647 token = tkTRIGGER; 648 }else if( nId==4 && sqliteStrNICmp(zSql, "temp", 4)==0 ){ 649 token = tkTEMP; 650 }else if( nId==9 && sqliteStrNICmp(zSql, "temporary", 9)==0 ){ 651 token = tkTEMP; 652 }else{ 653 token = tkOTHER; 654 } 655 break; 656 } 657 case 'e': case 'E': { 658 if( nId==3 && sqliteStrNICmp(zSql, "end", 3)==0 ){ 659 token = tkEND; 660 }else if( nId==7 && sqliteStrNICmp(zSql, "explain", 7)==0 ){ 661 token = tkEXPLAIN; 662 }else{ 663 token = tkOTHER; 664 } 665 break; 666 } 667 default: { 668 token = tkOTHER; 669 break; 670 } 671 } 672 zSql += nId-1; 673 }else{ 674 /* Operators and special symbols */ 675 token = tkOTHER; 676 } 677 break; 678 } 679 } 680 state = trans[state][token]; 681 zSql++; 682 } 683 return state==0; 684 } 685