xref: /sqlite-3.40.0/src/tokenize.c (revision c023e03e)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** An tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 **
18 ** $Id: tokenize.c,v 1.60 2003/05/04 18:30:59 drh Exp $
19 */
20 #include "sqliteInt.h"
21 #include "os.h"
22 #include <ctype.h>
23 #include <stdlib.h>
24 
25 /*
26 ** All the keywords of the SQL language are stored as in a hash
27 ** table composed of instances of the following structure.
28 */
29 typedef struct Keyword Keyword;
30 struct Keyword {
31   char *zName;             /* The keyword name */
32   int len;                 /* Number of characters in the keyword */
33   int tokenType;           /* The token value for this keyword */
34   Keyword *pNext;          /* Next keyword with the same hash */
35 };
36 
37 /*
38 ** These are the keywords
39 */
40 static Keyword aKeywordTable[] = {
41   { "ABORT",             0, TK_ABORT,            0 },
42   { "AFTER",             0, TK_AFTER,            0 },
43   { "ALL",               0, TK_ALL,              0 },
44   { "AND",               0, TK_AND,              0 },
45   { "AS",                0, TK_AS,               0 },
46   { "ASC",               0, TK_ASC,              0 },
47   { "ATTACH",            0, TK_ATTACH,           0 },
48   { "BEFORE",            0, TK_BEFORE,           0 },
49   { "BEGIN",             0, TK_BEGIN,            0 },
50   { "BETWEEN",           0, TK_BETWEEN,          0 },
51   { "BY",                0, TK_BY,               0 },
52   { "CASCADE",           0, TK_CASCADE,          0 },
53   { "CASE",              0, TK_CASE,             0 },
54   { "CHECK",             0, TK_CHECK,            0 },
55   { "CLUSTER",           0, TK_CLUSTER,          0 },
56   { "COLLATE",           0, TK_COLLATE,          0 },
57   { "COMMIT",            0, TK_COMMIT,           0 },
58   { "CONFLICT",          0, TK_CONFLICT,         0 },
59   { "CONSTRAINT",        0, TK_CONSTRAINT,       0 },
60   { "COPY",              0, TK_COPY,             0 },
61   { "CREATE",            0, TK_CREATE,           0 },
62   { "CROSS",             0, TK_JOIN_KW,          0 },
63   { "DATABASE",          0, TK_DATABASE,         0 },
64   { "DEFAULT",           0, TK_DEFAULT,          0 },
65   { "DEFERRED",          0, TK_DEFERRED,         0 },
66   { "DEFERRABLE",        0, TK_DEFERRABLE,       0 },
67   { "DELETE",            0, TK_DELETE,           0 },
68   { "DELIMITERS",        0, TK_DELIMITERS,       0 },
69   { "DESC",              0, TK_DESC,             0 },
70   { "DETACH",            0, TK_DETACH,           0 },
71   { "DISTINCT",          0, TK_DISTINCT,         0 },
72   { "DROP",              0, TK_DROP,             0 },
73   { "END",               0, TK_END,              0 },
74   { "EACH",              0, TK_EACH,             0 },
75   { "ELSE",              0, TK_ELSE,             0 },
76   { "EXCEPT",            0, TK_EXCEPT,           0 },
77   { "EXPLAIN",           0, TK_EXPLAIN,          0 },
78   { "FAIL",              0, TK_FAIL,             0 },
79   { "FOR",               0, TK_FOR,              0 },
80   { "FOREIGN",           0, TK_FOREIGN,          0 },
81   { "FROM",              0, TK_FROM,             0 },
82   { "FULL",              0, TK_JOIN_KW,          0 },
83   { "GLOB",              0, TK_GLOB,             0 },
84   { "GROUP",             0, TK_GROUP,            0 },
85   { "HAVING",            0, TK_HAVING,           0 },
86   { "IGNORE",            0, TK_IGNORE,           0 },
87   { "IMMEDIATE",         0, TK_IMMEDIATE,        0 },
88   { "IN",                0, TK_IN,               0 },
89   { "INDEX",             0, TK_INDEX,            0 },
90   { "INITIALLY",         0, TK_INITIALLY,        0 },
91   { "INNER",             0, TK_JOIN_KW,          0 },
92   { "INSERT",            0, TK_INSERT,           0 },
93   { "INSTEAD",           0, TK_INSTEAD,          0 },
94   { "INTERSECT",         0, TK_INTERSECT,        0 },
95   { "INTO",              0, TK_INTO,             0 },
96   { "IS",                0, TK_IS,               0 },
97   { "ISNULL",            0, TK_ISNULL,           0 },
98   { "JOIN",              0, TK_JOIN,             0 },
99   { "KEY",               0, TK_KEY,              0 },
100   { "LEFT",              0, TK_JOIN_KW,          0 },
101   { "LIKE",              0, TK_LIKE,             0 },
102   { "LIMIT",             0, TK_LIMIT,            0 },
103   { "MATCH",             0, TK_MATCH,            0 },
104   { "NATURAL",           0, TK_JOIN_KW,          0 },
105   { "NOT",               0, TK_NOT,              0 },
106   { "NOTNULL",           0, TK_NOTNULL,          0 },
107   { "NULL",              0, TK_NULL,             0 },
108   { "OF",                0, TK_OF,               0 },
109   { "OFFSET",            0, TK_OFFSET,           0 },
110   { "ON",                0, TK_ON,               0 },
111   { "OR",                0, TK_OR,               0 },
112   { "ORDER",             0, TK_ORDER,            0 },
113   { "OUTER",             0, TK_JOIN_KW,          0 },
114   { "PRAGMA",            0, TK_PRAGMA,           0 },
115   { "PRIMARY",           0, TK_PRIMARY,          0 },
116   { "RAISE",             0, TK_RAISE,            0 },
117   { "REFERENCES",        0, TK_REFERENCES,       0 },
118   { "REPLACE",           0, TK_REPLACE,          0 },
119   { "RESTRICT",          0, TK_RESTRICT,         0 },
120   { "RIGHT",             0, TK_JOIN_KW,          0 },
121   { "ROLLBACK",          0, TK_ROLLBACK,         0 },
122   { "ROW",               0, TK_ROW,              0 },
123   { "SELECT",            0, TK_SELECT,           0 },
124   { "SET",               0, TK_SET,              0 },
125   { "STATEMENT",         0, TK_STATEMENT,        0 },
126   { "TABLE",             0, TK_TABLE,            0 },
127   { "TEMP",              0, TK_TEMP,             0 },
128   { "TEMPORARY",         0, TK_TEMP,             0 },
129   { "THEN",              0, TK_THEN,             0 },
130   { "TRANSACTION",       0, TK_TRANSACTION,      0 },
131   { "TRIGGER",           0, TK_TRIGGER,          0 },
132   { "UNION",             0, TK_UNION,            0 },
133   { "UNIQUE",            0, TK_UNIQUE,           0 },
134   { "UPDATE",            0, TK_UPDATE,           0 },
135   { "USING",             0, TK_USING,            0 },
136   { "VACUUM",            0, TK_VACUUM,           0 },
137   { "VALUES",            0, TK_VALUES,           0 },
138   { "VIEW",              0, TK_VIEW,             0 },
139   { "WHEN",              0, TK_WHEN,             0 },
140   { "WHERE",             0, TK_WHERE,            0 },
141 };
142 
143 /*
144 ** This is the hash table
145 */
146 #define KEY_HASH_SIZE 71
147 static Keyword *apHashTable[KEY_HASH_SIZE];
148 
149 
150 /*
151 ** This function looks up an identifier to determine if it is a
152 ** keyword.  If it is a keyword, the token code of that keyword is
153 ** returned.  If the input is not a keyword, TK_ID is returned.
154 */
155 int sqliteKeywordCode(const char *z, int n){
156   int h;
157   Keyword *p;
158   if( aKeywordTable[0].len==0 ){
159     /* Initialize the keyword hash table */
160     sqliteOsEnterMutex();
161     if( aKeywordTable[0].len==0 ){
162       int i;
163       int n;
164       n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
165       for(i=0; i<n; i++){
166         aKeywordTable[i].len = strlen(aKeywordTable[i].zName);
167         h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len);
168         h %= KEY_HASH_SIZE;
169         aKeywordTable[i].pNext = apHashTable[h];
170         apHashTable[h] = &aKeywordTable[i];
171       }
172     }
173     sqliteOsLeaveMutex();
174   }
175   h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE;
176   for(p=apHashTable[h]; p; p=p->pNext){
177     if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){
178       return p->tokenType;
179     }
180   }
181   return TK_ID;
182 }
183 
184 
185 /*
186 ** If X is a character that can be used in an identifier then
187 ** isIdChar[X] will be 1.  Otherwise isIdChar[X] will be 0.
188 **
189 ** In this implementation, an identifier can be a string of
190 ** alphabetic characters, digits, and "_" plus any character
191 ** with the high-order bit set.  The latter rule means that
192 ** any sequence of UTF-8 characters or characters taken from
193 ** an extended ISO8859 character set can form an identifier.
194 */
195 static const char isIdChar[] = {
196 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
197     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 0x */
198     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 1x */
199     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
200     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
201     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
202     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
203     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
204     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
205     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 8x */
206     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 9x */
207     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Ax */
208     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Bx */
209     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Cx */
210     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Dx */
211     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Ex */
212     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* Fx */
213 };
214 
215 
216 /*
217 ** Return the length of the token that begins at z[0].  Return
218 ** -1 if the token is (or might be) incomplete.  Store the token
219 ** type in *tokenType before returning.
220 */
221 static int sqliteGetToken(const unsigned char *z, int *tokenType){
222   int i;
223   switch( *z ){
224     case ' ': case '\t': case '\n': case '\f': case '\r': {
225       for(i=1; isspace(z[i]); i++){}
226       *tokenType = TK_SPACE;
227       return i;
228     }
229     case '-': {
230       if( z[1]==0 ) return -1;
231       if( z[1]=='-' ){
232         for(i=2; z[i] && z[i]!='\n'; i++){}
233         *tokenType = TK_COMMENT;
234         return i;
235       }
236       *tokenType = TK_MINUS;
237       return 1;
238     }
239     case '(': {
240       if( z[1]=='+' && z[2]==')' ){
241         *tokenType = TK_ORACLE_OUTER_JOIN;
242         return 3;
243       }else{
244         *tokenType = TK_LP;
245         return 1;
246       }
247     }
248     case ')': {
249       *tokenType = TK_RP;
250       return 1;
251     }
252     case ';': {
253       *tokenType = TK_SEMI;
254       return 1;
255     }
256     case '+': {
257       *tokenType = TK_PLUS;
258       return 1;
259     }
260     case '*': {
261       *tokenType = TK_STAR;
262       return 1;
263     }
264     case '/': {
265       if( z[1]!='*' || z[2]==0 ){
266         *tokenType = TK_SLASH;
267         return 1;
268       }
269       for(i=3; z[i] && (z[i]!='/' || z[i-1]!='*'); i++){}
270       if( z[i] ) i++;
271       *tokenType = TK_COMMENT;
272       return i;
273     }
274     case '%': {
275       *tokenType = TK_REM;
276       return 1;
277     }
278     case '=': {
279       *tokenType = TK_EQ;
280       return 1 + (z[1]=='=');
281     }
282     case '<': {
283       if( z[1]=='=' ){
284         *tokenType = TK_LE;
285         return 2;
286       }else if( z[1]=='>' ){
287         *tokenType = TK_NE;
288         return 2;
289       }else if( z[1]=='<' ){
290         *tokenType = TK_LSHIFT;
291         return 2;
292       }else{
293         *tokenType = TK_LT;
294         return 1;
295       }
296     }
297     case '>': {
298       if( z[1]=='=' ){
299         *tokenType = TK_GE;
300         return 2;
301       }else if( z[1]=='>' ){
302         *tokenType = TK_RSHIFT;
303         return 2;
304       }else{
305         *tokenType = TK_GT;
306         return 1;
307       }
308     }
309     case '!': {
310       if( z[1]!='=' ){
311         *tokenType = TK_ILLEGAL;
312         return 2;
313       }else{
314         *tokenType = TK_NE;
315         return 2;
316       }
317     }
318     case '|': {
319       if( z[1]!='|' ){
320         *tokenType = TK_BITOR;
321         return 1;
322       }else{
323         *tokenType = TK_CONCAT;
324         return 2;
325       }
326     }
327     case ',': {
328       *tokenType = TK_COMMA;
329       return 1;
330     }
331     case '&': {
332       *tokenType = TK_BITAND;
333       return 1;
334     }
335     case '~': {
336       *tokenType = TK_BITNOT;
337       return 1;
338     }
339     case '\'': case '"': {
340       int delim = z[0];
341       for(i=1; z[i]; i++){
342         if( z[i]==delim ){
343           if( z[i+1]==delim ){
344             i++;
345           }else{
346             break;
347           }
348         }
349       }
350       if( z[i] ) i++;
351       *tokenType = TK_STRING;
352       return i;
353     }
354     case '.': {
355       *tokenType = TK_DOT;
356       return 1;
357     }
358     case '0': case '1': case '2': case '3': case '4':
359     case '5': case '6': case '7': case '8': case '9': {
360       *tokenType = TK_INTEGER;
361       for(i=1; isdigit(z[i]); i++){}
362       if( z[i]=='.' && isdigit(z[i+1]) ){
363         i += 2;
364         while( isdigit(z[i]) ){ i++; }
365         *tokenType = TK_FLOAT;
366       }
367       if( (z[i]=='e' || z[i]=='E') &&
368            ( isdigit(z[i+1])
369             || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
370            )
371       ){
372         i += 2;
373         while( isdigit(z[i]) ){ i++; }
374         *tokenType = TK_FLOAT;
375       }
376       return i;
377     }
378     case '[': {
379       for(i=1; z[i] && z[i-1]!=']'; i++){}
380       *tokenType = TK_ID;
381       return i;
382     }
383     default: {
384       if( !isIdChar[*z] ){
385         break;
386       }
387       for(i=1; isIdChar[z[i]]; i++){}
388       *tokenType = sqliteKeywordCode((char*)z, i);
389       return i;
390     }
391   }
392   *tokenType = TK_ILLEGAL;
393   return 1;
394 }
395 
396 /*
397 ** Run the parser on the given SQL string.  The parser structure is
398 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
399 ** and pzErrMsg!=NULL then an error message might be written into
400 ** memory obtained from malloc() and *pzErrMsg made to point to that
401 ** error message.  Or maybe not.
402 */
403 int sqliteRunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
404   int nErr = 0;
405   int i;
406   void *pEngine;
407   int tokenType;
408   int lastTokenParsed = -1;
409   sqlite *db = pParse->db;
410   extern void *sqliteParserAlloc(void*(*)(int));
411   extern void sqliteParserFree(void*, void(*)(void*));
412   extern int sqliteParser(void*, int, Token, Parse*);
413 
414   db->flags &= ~SQLITE_Interrupt;
415   pParse->rc = SQLITE_OK;
416   i = 0;
417   pEngine = sqliteParserAlloc((void*(*)(int))malloc);
418   if( pEngine==0 ){
419     sqliteSetString(pzErrMsg, "out of memory", 0);
420     return 1;
421   }
422   pParse->sLastToken.dyn = 0;
423   pParse->zTail = zSql;
424   while( sqlite_malloc_failed==0 && zSql[i]!=0 ){
425 
426     assert( i>=0 );
427     pParse->sLastToken.z = &zSql[i];
428     assert( pParse->sLastToken.dyn==0 );
429     pParse->sLastToken.n = sqliteGetToken((unsigned char*)&zSql[i], &tokenType);
430     i += pParse->sLastToken.n;
431     switch( tokenType ){
432       case TK_SPACE:
433       case TK_COMMENT: {
434         if( (db->flags & SQLITE_Interrupt)!=0 ){
435           pParse->rc = SQLITE_INTERRUPT;
436           sqliteSetString(pzErrMsg, "interrupt", 0);
437           goto abort_parse;
438         }
439         break;
440       }
441       case TK_ILLEGAL: {
442         sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1,
443            pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0);
444         nErr++;
445         goto abort_parse;
446       }
447       case TK_SEMI: {
448         pParse->zTail = &zSql[i];
449         /* Fall thru into the default case */
450       }
451       default: {
452         sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse);
453         lastTokenParsed = tokenType;
454         if( pParse->rc!=SQLITE_OK ){
455           goto abort_parse;
456         }
457         break;
458       }
459     }
460   }
461 abort_parse:
462   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
463     if( lastTokenParsed!=TK_SEMI ){
464       sqliteParser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
465       pParse->zTail = &zSql[i];
466     }
467     sqliteParser(pEngine, 0, pParse->sLastToken, pParse);
468   }
469   sqliteParserFree(pEngine, free);
470   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
471     sqliteSetString(&pParse->zErrMsg, sqlite_error_string(pParse->rc), 0);
472   }
473   if( pParse->zErrMsg ){
474     if( pzErrMsg && *pzErrMsg==0 ){
475       *pzErrMsg = pParse->zErrMsg;
476     }else{
477       sqliteFree(pParse->zErrMsg);
478     }
479     pParse->zErrMsg = 0;
480     if( !nErr ) nErr++;
481   }
482   if( pParse->pVdbe && (pParse->useCallback || pParse->nErr>0) ){
483     sqliteVdbeDelete(pParse->pVdbe);
484     pParse->pVdbe = 0;
485   }
486   if( pParse->pNewTable ){
487     sqliteDeleteTable(pParse->db, pParse->pNewTable);
488     pParse->pNewTable = 0;
489   }
490   if( pParse->pNewTrigger ){
491     sqliteDeleteTrigger(pParse->pNewTrigger);
492     pParse->pNewTrigger = 0;
493   }
494   if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
495     pParse->rc = SQLITE_ERROR;
496   }
497   return nErr;
498 }
499 
500 /*
501 ** Token types used by the sqlite_complete() routine.  See the header
502 ** comments on that procedure for additional information.
503 */
504 #define tkEXPLAIN 0
505 #define tkCREATE  1
506 #define tkTEMP    2
507 #define tkTRIGGER 3
508 #define tkEND     4
509 #define tkSEMI    5
510 #define tkWS      6
511 #define tkOTHER   7
512 
513 /*
514 ** Return TRUE if the given SQL string ends in a semicolon.
515 **
516 ** Special handling is require for CREATE TRIGGER statements.
517 ** Whenever the CREATE TRIGGER keywords are seen, the statement
518 ** must end with ";END;".
519 **
520 ** This implementation uses a state machine with 7 states:
521 **
522 **   (0) START     At the beginning or end of an SQL statement.  This routine
523 **                 returns 1 if it ends in the START state and 0 if it ends
524 **                 in any other state.
525 **
526 **   (1) EXPLAIN   The keyword EXPLAIN has been seen at the beginning of
527 **                 a statement.
528 **
529 **   (2) CREATE    The keyword CREATE has been seen at the beginning of a
530 **                 statement, possibly preceeded by EXPLAIN and/or followed by
531 **                 TEMP or TEMPORARY
532 **
533 **   (3) NORMAL    We are in the middle of statement which ends with a single
534 **                 semicolon.
535 **
536 **   (4) TRIGGER   We are in the middle of a trigger definition that must be
537 **                 ended by a semicolon, the keyword END, and another semicolon.
538 **
539 **   (5) SEMI      We've seen the first semicolon in the ";END;" that occurs at
540 **                 the end of a trigger definition.
541 **
542 **   (6) END       We've seen the ";END" of the ";END;" that occurs at the end
543 **                 of a trigger difinition.
544 **
545 ** Transitions between states above are determined by tokens extracted
546 ** from the input.  The following tokens are significant:
547 **
548 **   (0) tkEXPLAIN   The "explain" keyword.
549 **   (1) tkCREATE    The "create" keyword.
550 **   (2) tkTEMP      The "temp" or "temporary" keyword.
551 **   (3) tkTRIGGER   The "trigger" keyword.
552 **   (4) tkEND       The "end" keyword.
553 **   (5) tkSEMI      A semicolon.
554 **   (6) tkWS        Whitespace
555 **   (7) tkOTHER     Any other SQL token.
556 **
557 ** Whitespace never causes a state transition and is always ignored.
558 */
559 int sqlite_complete(const char *zSql){
560   u8 state = 0;   /* Current state, using numbers defined in header comment */
561   u8 token;       /* Value of the next token */
562 
563   /* The following matrix defines the transition from one state to another
564   ** according to what token is seen.  trans[state][token] returns the
565   ** next state.
566   */
567   static const u8 trans[7][8] = {
568                      /* Token:                                                */
569      /* State:       **  EXPLAIN  CREATE  TEMP  TRIGGER  END  SEMI  WS  OTHER */
570      /* 0   START: */ {       1,      2,    3,       3,   3,    0,  0,     3, },
571      /* 1 EXPLAIN: */ {       3,      2,    3,       3,   3,    0,  1,     3, },
572      /* 2  CREATE: */ {       3,      3,    2,       4,   3,    0,  2,     3, },
573      /* 3  NORMAL: */ {       3,      3,    3,       3,   3,    0,  3,     3, },
574      /* 4 TRIGGER: */ {       4,      4,    4,       4,   4,    5,  4,     4, },
575      /* 5    SEMI: */ {       4,      4,    4,       4,   6,    5,  5,     4, },
576      /* 6     END: */ {       4,      4,    4,       4,   4,    0,  6,     4, },
577   };
578 
579   while( *zSql ){
580     switch( *zSql ){
581       case ';': {  /* A semicolon */
582         token = tkSEMI;
583         break;
584       }
585       case ' ':
586       case '\r':
587       case '\t':
588       case '\n':
589       case '\f': {  /* White space is ignored */
590         token = tkWS;
591         break;
592       }
593       case '/': {   /* C-style comments */
594         if( zSql[1]!='*' ){
595           token = tkOTHER;
596           break;
597         }
598         zSql += 2;
599         while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; }
600         if( zSql[0]==0 ) return 0;
601         zSql++;
602         token = tkWS;
603         break;
604       }
605       case '-': {   /* SQL-style comments from "--" to end of line */
606         if( zSql[1]!='-' ){
607           token = tkOTHER;
608           break;
609         }
610         while( *zSql && *zSql!='\n' ){ zSql++; }
611         if( *zSql==0 ) return state==0;
612         token = tkWS;
613         break;
614       }
615       case '[': {   /* Microsoft-style identifiers in [...] */
616         zSql++;
617         while( *zSql && *zSql!=']' ){ zSql++; }
618         if( *zSql==0 ) return 0;
619         token = tkOTHER;
620         break;
621       }
622       case '"':     /* single- and double-quoted strings */
623       case '\'': {
624         int c = *zSql;
625         zSql++;
626         while( *zSql && *zSql!=c ){ zSql++; }
627         if( *zSql==0 ) return 0;
628         token = tkOTHER;
629         break;
630       }
631       default: {
632         if( isIdChar[(u8)*zSql] ){
633           /* Keywords and unquoted identifiers */
634           int nId;
635           for(nId=1; isIdChar[(u8)zSql[nId]]; nId++){}
636           switch( *zSql ){
637             case 'c': case 'C': {
638               if( nId==6 && sqliteStrNICmp(zSql, "create", 6)==0 ){
639                 token = tkCREATE;
640               }else{
641                 token = tkOTHER;
642               }
643               break;
644             }
645             case 't': case 'T': {
646               if( nId==7 && sqliteStrNICmp(zSql, "trigger", 7)==0 ){
647                 token = tkTRIGGER;
648               }else if( nId==4 && sqliteStrNICmp(zSql, "temp", 4)==0 ){
649                 token = tkTEMP;
650               }else if( nId==9 && sqliteStrNICmp(zSql, "temporary", 9)==0 ){
651                 token = tkTEMP;
652               }else{
653                 token = tkOTHER;
654               }
655               break;
656             }
657             case 'e':  case 'E': {
658               if( nId==3 && sqliteStrNICmp(zSql, "end", 3)==0 ){
659                 token = tkEND;
660               }else if( nId==7 && sqliteStrNICmp(zSql, "explain", 7)==0 ){
661                 token = tkEXPLAIN;
662               }else{
663                 token = tkOTHER;
664               }
665               break;
666             }
667             default: {
668               token = tkOTHER;
669               break;
670             }
671           }
672           zSql += nId-1;
673         }else{
674           /* Operators and special symbols */
675           token = tkOTHER;
676         }
677         break;
678       }
679     }
680     state = trans[state][token];
681     zSql++;
682   }
683   return state==0;
684 }
685