xref: /sqlite-3.40.0/src/tokenize.c (revision a3f06598)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** An tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 **
18 ** $Id: tokenize.c,v 1.155 2009/03/31 03:41:57 shane Exp $
19 */
20 #include "sqliteInt.h"
21 #include <stdlib.h>
22 
23 /*
24 ** The charMap() macro maps alphabetic characters into their
25 ** lower-case ASCII equivalent.  On ASCII machines, this is just
26 ** an upper-to-lower case map.  On EBCDIC machines we also need
27 ** to adjust the encoding.  Only alphabetic characters and underscores
28 ** need to be translated.
29 */
30 #ifdef SQLITE_ASCII
31 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
32 #endif
33 #ifdef SQLITE_EBCDIC
34 # define charMap(X) ebcdicToAscii[(unsigned char)X]
35 const unsigned char ebcdicToAscii[] = {
36 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
37    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
38    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
39    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
40    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
41    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
42    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
43    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
44    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
45    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
46    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
47    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
48    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
49    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
50    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
51    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
52    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
53 };
54 #endif
55 
56 /*
57 ** The sqlite3KeywordCode function looks up an identifier to determine if
58 ** it is a keyword.  If it is a keyword, the token code of that keyword is
59 ** returned.  If the input is not a keyword, TK_ID is returned.
60 **
61 ** The implementation of this routine was generated by a program,
62 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
63 ** The output of the mkkeywordhash.c program is written into a file
64 ** named keywordhash.h and then included into this source file by
65 ** the #include below.
66 */
67 #include "keywordhash.h"
68 
69 
70 /*
71 ** If X is a character that can be used in an identifier then
72 ** IdChar(X) will be true.  Otherwise it is false.
73 **
74 ** For ASCII, any character with the high-order bit set is
75 ** allowed in an identifier.  For 7-bit characters,
76 ** sqlite3IsIdChar[X] must be 1.
77 **
78 ** For EBCDIC, the rules are more complex but have the same
79 ** end result.
80 **
81 ** Ticket #1066.  the SQL standard does not allow '$' in the
82 ** middle of identfiers.  But many SQL implementations do.
83 ** SQLite will allow '$' in identifiers for compatibility.
84 ** But the feature is undocumented.
85 */
86 #ifdef SQLITE_ASCII
87 const char sqlite3IsAsciiIdChar[] = {
88 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
89     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
90     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
91     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
92     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
93     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
94     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
95 };
96 #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20]))
97 #endif
98 #ifdef SQLITE_EBCDIC
99 const char sqlite3IsEbcdicIdChar[] = {
100 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
101     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
102     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
103     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
104     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
105     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
106     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
107     1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
108     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
109     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
110     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
111     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
112     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
113 };
114 #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
115 #endif
116 
117 
118 /*
119 ** Return the length of the token that begins at z[0].
120 ** Store the token type in *tokenType before returning.
121 */
122 int sqlite3GetToken(const unsigned char *z, int *tokenType){
123   int i, c;
124   switch( *z ){
125     case ' ': case '\t': case '\n': case '\f': case '\r': {
126       for(i=1; sqlite3Isspace(z[i]); i++){}
127       *tokenType = TK_SPACE;
128       return i;
129     }
130     case '-': {
131       if( z[1]=='-' ){
132         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
133         *tokenType = TK_SPACE;
134         return i;
135       }
136       *tokenType = TK_MINUS;
137       return 1;
138     }
139     case '(': {
140       *tokenType = TK_LP;
141       return 1;
142     }
143     case ')': {
144       *tokenType = TK_RP;
145       return 1;
146     }
147     case ';': {
148       *tokenType = TK_SEMI;
149       return 1;
150     }
151     case '+': {
152       *tokenType = TK_PLUS;
153       return 1;
154     }
155     case '*': {
156       *tokenType = TK_STAR;
157       return 1;
158     }
159     case '/': {
160       if( z[1]!='*' || z[2]==0 ){
161         *tokenType = TK_SLASH;
162         return 1;
163       }
164       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
165       if( c ) i++;
166       *tokenType = TK_SPACE;
167       return i;
168     }
169     case '%': {
170       *tokenType = TK_REM;
171       return 1;
172     }
173     case '=': {
174       *tokenType = TK_EQ;
175       return 1 + (z[1]=='=');
176     }
177     case '<': {
178       if( (c=z[1])=='=' ){
179         *tokenType = TK_LE;
180         return 2;
181       }else if( c=='>' ){
182         *tokenType = TK_NE;
183         return 2;
184       }else if( c=='<' ){
185         *tokenType = TK_LSHIFT;
186         return 2;
187       }else{
188         *tokenType = TK_LT;
189         return 1;
190       }
191     }
192     case '>': {
193       if( (c=z[1])=='=' ){
194         *tokenType = TK_GE;
195         return 2;
196       }else if( c=='>' ){
197         *tokenType = TK_RSHIFT;
198         return 2;
199       }else{
200         *tokenType = TK_GT;
201         return 1;
202       }
203     }
204     case '!': {
205       if( z[1]!='=' ){
206         *tokenType = TK_ILLEGAL;
207         return 2;
208       }else{
209         *tokenType = TK_NE;
210         return 2;
211       }
212     }
213     case '|': {
214       if( z[1]!='|' ){
215         *tokenType = TK_BITOR;
216         return 1;
217       }else{
218         *tokenType = TK_CONCAT;
219         return 2;
220       }
221     }
222     case ',': {
223       *tokenType = TK_COMMA;
224       return 1;
225     }
226     case '&': {
227       *tokenType = TK_BITAND;
228       return 1;
229     }
230     case '~': {
231       *tokenType = TK_BITNOT;
232       return 1;
233     }
234     case '`':
235     case '\'':
236     case '"': {
237       int delim = z[0];
238       for(i=1; (c=z[i])!=0; i++){
239         if( c==delim ){
240           if( z[i+1]==delim ){
241             i++;
242           }else{
243             break;
244           }
245         }
246       }
247       if( c=='\'' ){
248         *tokenType = TK_STRING;
249         return i+1;
250       }else if( c!=0 ){
251         *tokenType = TK_ID;
252         return i+1;
253       }else{
254         *tokenType = TK_ILLEGAL;
255         return i;
256       }
257     }
258     case '.': {
259 #ifndef SQLITE_OMIT_FLOATING_POINT
260       if( !sqlite3Isdigit(z[1]) )
261 #endif
262       {
263         *tokenType = TK_DOT;
264         return 1;
265       }
266       /* If the next character is a digit, this is a floating point
267       ** number that begins with ".".  Fall thru into the next case */
268     }
269     case '0': case '1': case '2': case '3': case '4':
270     case '5': case '6': case '7': case '8': case '9': {
271       *tokenType = TK_INTEGER;
272       for(i=0; sqlite3Isdigit(z[i]); i++){}
273 #ifndef SQLITE_OMIT_FLOATING_POINT
274       if( z[i]=='.' ){
275         i++;
276         while( sqlite3Isdigit(z[i]) ){ i++; }
277         *tokenType = TK_FLOAT;
278       }
279       if( (z[i]=='e' || z[i]=='E') &&
280            ( sqlite3Isdigit(z[i+1])
281             || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
282            )
283       ){
284         i += 2;
285         while( sqlite3Isdigit(z[i]) ){ i++; }
286         *tokenType = TK_FLOAT;
287       }
288 #endif
289       while( IdChar(z[i]) ){
290         *tokenType = TK_ILLEGAL;
291         i++;
292       }
293       return i;
294     }
295     case '[': {
296       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
297       *tokenType = c==']' ? TK_ID : TK_ILLEGAL;
298       return i;
299     }
300     case '?': {
301       *tokenType = TK_VARIABLE;
302       for(i=1; sqlite3Isdigit(z[i]); i++){}
303       return i;
304     }
305     case '#': {
306       for(i=1; sqlite3Isdigit(z[i]); i++){}
307       if( i>1 ){
308         /* Parameters of the form #NNN (where NNN is a number) are used
309         ** internally by sqlite3NestedParse.  */
310         *tokenType = TK_REGISTER;
311         return i;
312       }
313       /* Fall through into the next case if the '#' is not followed by
314       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
315     }
316 #ifndef SQLITE_OMIT_TCL_VARIABLE
317     case '$':
318 #endif
319     case '@':  /* For compatibility with MS SQL Server */
320     case ':': {
321       int n = 0;
322       *tokenType = TK_VARIABLE;
323       for(i=1; (c=z[i])!=0; i++){
324         if( IdChar(c) ){
325           n++;
326 #ifndef SQLITE_OMIT_TCL_VARIABLE
327         }else if( c=='(' && n>0 ){
328           do{
329             i++;
330           }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
331           if( c==')' ){
332             i++;
333           }else{
334             *tokenType = TK_ILLEGAL;
335           }
336           break;
337         }else if( c==':' && z[i+1]==':' ){
338           i++;
339 #endif
340         }else{
341           break;
342         }
343       }
344       if( n==0 ) *tokenType = TK_ILLEGAL;
345       return i;
346     }
347 #ifndef SQLITE_OMIT_BLOB_LITERAL
348     case 'x': case 'X': {
349       if( z[1]=='\'' ){
350         *tokenType = TK_BLOB;
351         for(i=2; (c=z[i])!=0 && c!='\''; i++){
352           if( !sqlite3Isxdigit(c) ){
353             *tokenType = TK_ILLEGAL;
354           }
355         }
356         if( i%2 || !c ) *tokenType = TK_ILLEGAL;
357         if( c ) i++;
358         return i;
359       }
360       /* Otherwise fall through to the next case */
361     }
362 #endif
363     default: {
364       if( !IdChar(*z) ){
365         break;
366       }
367       for(i=1; IdChar(z[i]); i++){}
368       *tokenType = keywordCode((char*)z, i);
369       return i;
370     }
371   }
372   *tokenType = TK_ILLEGAL;
373   return 1;
374 }
375 
376 /*
377 ** Run the parser on the given SQL string.  The parser structure is
378 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
379 ** then an and attempt is made to write an error message into
380 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
381 ** error message.
382 */
383 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
384   int nErr = 0;                   /* Number of errors encountered */
385   int i;                          /* Loop counter */
386   void *pEngine;                  /* The LEMON-generated LALR(1) parser */
387   int tokenType;                  /* type of the next token */
388   int lastTokenParsed = -1;       /* type of the previous token */
389   u8 enableLookaside;             /* Saved value of db->lookaside.bEnabled */
390   sqlite3 *db = pParse->db;       /* The database connection */
391   int mxSqlLen;                   /* Max length of an SQL string */
392 
393 
394   mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
395   if( db->activeVdbeCnt==0 ){
396     db->u1.isInterrupted = 0;
397   }
398   pParse->rc = SQLITE_OK;
399   pParse->zTail = pParse->zSql = zSql;
400   i = 0;
401   assert( pzErrMsg!=0 );
402   pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc);
403   if( pEngine==0 ){
404     db->mallocFailed = 1;
405     return SQLITE_NOMEM;
406   }
407   assert( pParse->sLastToken.dyn==0 );
408   assert( pParse->pNewTable==0 );
409   assert( pParse->pNewTrigger==0 );
410   assert( pParse->nVar==0 );
411   assert( pParse->nVarExpr==0 );
412   assert( pParse->nVarExprAlloc==0 );
413   assert( pParse->apVarExpr==0 );
414   enableLookaside = db->lookaside.bEnabled;
415   if( db->lookaside.pStart ) db->lookaside.bEnabled = 1;
416   while( !db->mallocFailed && zSql[i]!=0 ){
417     assert( i>=0 );
418     pParse->sLastToken.z = (u8*)&zSql[i];
419     assert( pParse->sLastToken.dyn==0 );
420     pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType);
421     i += pParse->sLastToken.n;
422     if( i>mxSqlLen ){
423       pParse->rc = SQLITE_TOOBIG;
424       break;
425     }
426     switch( tokenType ){
427       case TK_SPACE: {
428         if( db->u1.isInterrupted ){
429           pParse->rc = SQLITE_INTERRUPT;
430           sqlite3SetString(pzErrMsg, db, "interrupt");
431           goto abort_parse;
432         }
433         break;
434       }
435       case TK_ILLEGAL: {
436         sqlite3DbFree(db, *pzErrMsg);
437         *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
438                         &pParse->sLastToken);
439         nErr++;
440         goto abort_parse;
441       }
442       case TK_SEMI: {
443         pParse->zTail = &zSql[i];
444         /* Fall thru into the default case */
445       }
446       default: {
447         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
448         lastTokenParsed = tokenType;
449         if( pParse->rc!=SQLITE_OK ){
450           goto abort_parse;
451         }
452         break;
453       }
454     }
455   }
456 abort_parse:
457   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
458     if( lastTokenParsed!=TK_SEMI ){
459       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
460       pParse->zTail = &zSql[i];
461     }
462     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
463   }
464 #ifdef YYTRACKMAXSTACKDEPTH
465   sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK,
466       sqlite3ParserStackPeak(pEngine)
467   );
468 #endif /* YYDEBUG */
469   sqlite3ParserFree(pEngine, sqlite3_free);
470   db->lookaside.bEnabled = enableLookaside;
471   if( db->mallocFailed ){
472     pParse->rc = SQLITE_NOMEM;
473   }
474   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
475     sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc));
476   }
477   if( pParse->zErrMsg ){
478     if( *pzErrMsg==0 ){
479       *pzErrMsg = pParse->zErrMsg;
480     }else{
481       sqlite3DbFree(db, pParse->zErrMsg);
482     }
483     pParse->zErrMsg = 0;
484     nErr++;
485   }
486   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
487     sqlite3VdbeDelete(pParse->pVdbe);
488     pParse->pVdbe = 0;
489   }
490 #ifndef SQLITE_OMIT_SHARED_CACHE
491   if( pParse->nested==0 ){
492     sqlite3DbFree(db, pParse->aTableLock);
493     pParse->aTableLock = 0;
494     pParse->nTableLock = 0;
495   }
496 #endif
497 #ifndef SQLITE_OMIT_VIRTUALTABLE
498   sqlite3DbFree(db, pParse->apVtabLock);
499 #endif
500 
501   if( !IN_DECLARE_VTAB ){
502     /* If the pParse->declareVtab flag is set, do not delete any table
503     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
504     ** will take responsibility for freeing the Table structure.
505     */
506     sqlite3DeleteTable(pParse->pNewTable);
507   }
508 
509   sqlite3DeleteTrigger(db, pParse->pNewTrigger);
510   sqlite3DbFree(db, pParse->apVarExpr);
511   sqlite3DbFree(db, pParse->aAlias);
512   while( pParse->pZombieTab ){
513     Table *p = pParse->pZombieTab;
514     pParse->pZombieTab = p->pNextZombie;
515     sqlite3DeleteTable(p);
516   }
517   if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
518     pParse->rc = SQLITE_ERROR;
519   }
520   return nErr;
521 }
522