xref: /sqlite-3.40.0/src/tokenize.c (revision 5d00d0a8)
1 /*
2 ** 2001 September 15
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 ** An tokenizer for SQL
13 **
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
17 **
18 ** $Id: tokenize.c,v 1.163 2009/07/03 22:54:37 drh Exp $
19 */
20 #include "sqliteInt.h"
21 #include <stdlib.h>
22 
23 /*
24 ** The charMap() macro maps alphabetic characters into their
25 ** lower-case ASCII equivalent.  On ASCII machines, this is just
26 ** an upper-to-lower case map.  On EBCDIC machines we also need
27 ** to adjust the encoding.  Only alphabetic characters and underscores
28 ** need to be translated.
29 */
30 #ifdef SQLITE_ASCII
31 # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
32 #endif
33 #ifdef SQLITE_EBCDIC
34 # define charMap(X) ebcdicToAscii[(unsigned char)X]
35 const unsigned char ebcdicToAscii[] = {
36 /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
37    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
38    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
39    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
40    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
41    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
42    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
43    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
44    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
45    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
46    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
47    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
48    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
49    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
50    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
51    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
52    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
53 };
54 #endif
55 
56 /*
57 ** The sqlite3KeywordCode function looks up an identifier to determine if
58 ** it is a keyword.  If it is a keyword, the token code of that keyword is
59 ** returned.  If the input is not a keyword, TK_ID is returned.
60 **
61 ** The implementation of this routine was generated by a program,
62 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
63 ** The output of the mkkeywordhash.c program is written into a file
64 ** named keywordhash.h and then included into this source file by
65 ** the #include below.
66 */
67 #include "keywordhash.h"
68 
69 
70 /*
71 ** If X is a character that can be used in an identifier then
72 ** IdChar(X) will be true.  Otherwise it is false.
73 **
74 ** For ASCII, any character with the high-order bit set is
75 ** allowed in an identifier.  For 7-bit characters,
76 ** sqlite3IsIdChar[X] must be 1.
77 **
78 ** For EBCDIC, the rules are more complex but have the same
79 ** end result.
80 **
81 ** Ticket #1066.  the SQL standard does not allow '$' in the
82 ** middle of identfiers.  But many SQL implementations do.
83 ** SQLite will allow '$' in identifiers for compatibility.
84 ** But the feature is undocumented.
85 */
86 #ifdef SQLITE_ASCII
87 const char sqlite3IsAsciiIdChar[] = {
88 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
89     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 2x */
90     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 3x */
91     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 4x */
92     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,  /* 5x */
93     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /* 6x */
94     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,  /* 7x */
95 };
96 #define IdChar(C)  (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20]))
97 #endif
98 #ifdef SQLITE_EBCDIC
99 const char sqlite3IsEbcdicIdChar[] = {
100 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
101     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
102     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
103     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
104     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
105     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
106     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
107     1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
108     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
109     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
110     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
111     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
112     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
113 };
114 #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
115 #endif
116 
117 
118 /*
119 ** Return the length of the token that begins at z[0].
120 ** Store the token type in *tokenType before returning.
121 */
122 int sqlite3GetToken(const unsigned char *z, int *tokenType){
123   int i, c;
124   switch( *z ){
125     case ' ': case '\t': case '\n': case '\f': case '\r': {
126       testcase( z[0]==' ' );
127       testcase( z[0]=='\t' );
128       testcase( z[0]=='\n' );
129       testcase( z[0]=='\f' );
130       testcase( z[0]=='\r' );
131       for(i=1; sqlite3Isspace(z[i]); i++){}
132       *tokenType = TK_SPACE;
133       return i;
134     }
135     case '-': {
136       if( z[1]=='-' ){
137         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
138         *tokenType = TK_SPACE;
139         return i;
140       }
141       *tokenType = TK_MINUS;
142       return 1;
143     }
144     case '(': {
145       *tokenType = TK_LP;
146       return 1;
147     }
148     case ')': {
149       *tokenType = TK_RP;
150       return 1;
151     }
152     case ';': {
153       *tokenType = TK_SEMI;
154       return 1;
155     }
156     case '+': {
157       *tokenType = TK_PLUS;
158       return 1;
159     }
160     case '*': {
161       *tokenType = TK_STAR;
162       return 1;
163     }
164     case '/': {
165       if( z[1]!='*' || z[2]==0 ){
166         *tokenType = TK_SLASH;
167         return 1;
168       }
169       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
170       if( c ) i++;
171       *tokenType = TK_SPACE;
172       return i;
173     }
174     case '%': {
175       *tokenType = TK_REM;
176       return 1;
177     }
178     case '=': {
179       *tokenType = TK_EQ;
180       return 1 + (z[1]=='=');
181     }
182     case '<': {
183       if( (c=z[1])=='=' ){
184         *tokenType = TK_LE;
185         return 2;
186       }else if( c=='>' ){
187         *tokenType = TK_NE;
188         return 2;
189       }else if( c=='<' ){
190         *tokenType = TK_LSHIFT;
191         return 2;
192       }else{
193         *tokenType = TK_LT;
194         return 1;
195       }
196     }
197     case '>': {
198       if( (c=z[1])=='=' ){
199         *tokenType = TK_GE;
200         return 2;
201       }else if( c=='>' ){
202         *tokenType = TK_RSHIFT;
203         return 2;
204       }else{
205         *tokenType = TK_GT;
206         return 1;
207       }
208     }
209     case '!': {
210       if( z[1]!='=' ){
211         *tokenType = TK_ILLEGAL;
212         return 2;
213       }else{
214         *tokenType = TK_NE;
215         return 2;
216       }
217     }
218     case '|': {
219       if( z[1]!='|' ){
220         *tokenType = TK_BITOR;
221         return 1;
222       }else{
223         *tokenType = TK_CONCAT;
224         return 2;
225       }
226     }
227     case ',': {
228       *tokenType = TK_COMMA;
229       return 1;
230     }
231     case '&': {
232       *tokenType = TK_BITAND;
233       return 1;
234     }
235     case '~': {
236       *tokenType = TK_BITNOT;
237       return 1;
238     }
239     case '`':
240     case '\'':
241     case '"': {
242       int delim = z[0];
243       testcase( delim=='`' );
244       testcase( delim=='\'' );
245       testcase( delim=='"' );
246       for(i=1; (c=z[i])!=0; i++){
247         if( c==delim ){
248           if( z[i+1]==delim ){
249             i++;
250           }else{
251             break;
252           }
253         }
254       }
255       if( c=='\'' ){
256         *tokenType = TK_STRING;
257         return i+1;
258       }else if( c!=0 ){
259         *tokenType = TK_ID;
260         return i+1;
261       }else{
262         *tokenType = TK_ILLEGAL;
263         return i;
264       }
265     }
266     case '.': {
267 #ifndef SQLITE_OMIT_FLOATING_POINT
268       if( !sqlite3Isdigit(z[1]) )
269 #endif
270       {
271         *tokenType = TK_DOT;
272         return 1;
273       }
274       /* If the next character is a digit, this is a floating point
275       ** number that begins with ".".  Fall thru into the next case */
276     }
277     case '0': case '1': case '2': case '3': case '4':
278     case '5': case '6': case '7': case '8': case '9': {
279       testcase( z[0]=='0' );  testcase( z[0]=='1' );  testcase( z[0]=='2' );
280       testcase( z[0]=='3' );  testcase( z[0]=='4' );  testcase( z[0]=='5' );
281       testcase( z[0]=='6' );  testcase( z[0]=='7' );  testcase( z[0]=='8' );
282       testcase( z[0]=='9' );
283       *tokenType = TK_INTEGER;
284       for(i=0; sqlite3Isdigit(z[i]); i++){}
285 #ifndef SQLITE_OMIT_FLOATING_POINT
286       if( z[i]=='.' ){
287         i++;
288         while( sqlite3Isdigit(z[i]) ){ i++; }
289         *tokenType = TK_FLOAT;
290       }
291       if( (z[i]=='e' || z[i]=='E') &&
292            ( sqlite3Isdigit(z[i+1])
293             || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
294            )
295       ){
296         i += 2;
297         while( sqlite3Isdigit(z[i]) ){ i++; }
298         *tokenType = TK_FLOAT;
299       }
300 #endif
301       while( IdChar(z[i]) ){
302         *tokenType = TK_ILLEGAL;
303         i++;
304       }
305       return i;
306     }
307     case '[': {
308       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
309       *tokenType = c==']' ? TK_ID : TK_ILLEGAL;
310       return i;
311     }
312     case '?': {
313       *tokenType = TK_VARIABLE;
314       for(i=1; sqlite3Isdigit(z[i]); i++){}
315       return i;
316     }
317     case '#': {
318       for(i=1; sqlite3Isdigit(z[i]); i++){}
319       if( i>1 ){
320         /* Parameters of the form #NNN (where NNN is a number) are used
321         ** internally by sqlite3NestedParse.  */
322         *tokenType = TK_REGISTER;
323         return i;
324       }
325       /* Fall through into the next case if the '#' is not followed by
326       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
327     }
328 #ifndef SQLITE_OMIT_TCL_VARIABLE
329     case '$':
330 #endif
331     case '@':  /* For compatibility with MS SQL Server */
332     case ':': {
333       int n = 0;
334       testcase( z[0]=='$' );  testcase( z[0]=='@' );  testcase( z[0]==':' );
335       *tokenType = TK_VARIABLE;
336       for(i=1; (c=z[i])!=0; i++){
337         if( IdChar(c) ){
338           n++;
339 #ifndef SQLITE_OMIT_TCL_VARIABLE
340         }else if( c=='(' && n>0 ){
341           do{
342             i++;
343           }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
344           if( c==')' ){
345             i++;
346           }else{
347             *tokenType = TK_ILLEGAL;
348           }
349           break;
350         }else if( c==':' && z[i+1]==':' ){
351           i++;
352 #endif
353         }else{
354           break;
355         }
356       }
357       if( n==0 ) *tokenType = TK_ILLEGAL;
358       return i;
359     }
360 #ifndef SQLITE_OMIT_BLOB_LITERAL
361     case 'x': case 'X': {
362       testcase( z[0]=='x' ); testcase( z[0]=='X' );
363       if( z[1]=='\'' ){
364         *tokenType = TK_BLOB;
365         for(i=2; (c=z[i])!=0 && c!='\''; i++){
366           if( !sqlite3Isxdigit(c) ){
367             *tokenType = TK_ILLEGAL;
368           }
369         }
370         if( i%2 || !c ) *tokenType = TK_ILLEGAL;
371         if( c ) i++;
372         return i;
373       }
374       /* Otherwise fall through to the next case */
375     }
376 #endif
377     default: {
378       if( !IdChar(*z) ){
379         break;
380       }
381       for(i=1; IdChar(z[i]); i++){}
382       *tokenType = keywordCode((char*)z, i);
383       return i;
384     }
385   }
386   *tokenType = TK_ILLEGAL;
387   return 1;
388 }
389 
390 /*
391 ** Run the parser on the given SQL string.  The parser structure is
392 ** passed in.  An SQLITE_ status code is returned.  If an error occurs
393 ** then an and attempt is made to write an error message into
394 ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
395 ** error message.
396 */
397 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
398   int nErr = 0;                   /* Number of errors encountered */
399   int i;                          /* Loop counter */
400   void *pEngine;                  /* The LEMON-generated LALR(1) parser */
401   int tokenType;                  /* type of the next token */
402   int lastTokenParsed = -1;       /* type of the previous token */
403   u8 enableLookaside;             /* Saved value of db->lookaside.bEnabled */
404   sqlite3 *db = pParse->db;       /* The database connection */
405   int mxSqlLen;                   /* Max length of an SQL string */
406 
407 
408   mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
409   if( db->activeVdbeCnt==0 ){
410     db->u1.isInterrupted = 0;
411   }
412   pParse->rc = SQLITE_OK;
413   pParse->zTail = zSql;
414   i = 0;
415   assert( pzErrMsg!=0 );
416   pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc);
417   if( pEngine==0 ){
418     db->mallocFailed = 1;
419     return SQLITE_NOMEM;
420   }
421   assert( pParse->pNewTable==0 );
422   assert( pParse->pNewTrigger==0 );
423   assert( pParse->nVar==0 );
424   assert( pParse->nVarExpr==0 );
425   assert( pParse->nVarExprAlloc==0 );
426   assert( pParse->apVarExpr==0 );
427   enableLookaside = db->lookaside.bEnabled;
428   if( db->lookaside.pStart ) db->lookaside.bEnabled = 1;
429   while( !db->mallocFailed && zSql[i]!=0 ){
430     assert( i>=0 );
431     pParse->sLastToken.z = &zSql[i];
432     pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType);
433     i += pParse->sLastToken.n;
434     if( i>mxSqlLen ){
435       pParse->rc = SQLITE_TOOBIG;
436       break;
437     }
438     switch( tokenType ){
439       case TK_SPACE: {
440         if( db->u1.isInterrupted ){
441           sqlite3ErrorMsg(pParse, "interrupt");
442           pParse->rc = SQLITE_INTERRUPT;
443           goto abort_parse;
444         }
445         break;
446       }
447       case TK_ILLEGAL: {
448         sqlite3DbFree(db, *pzErrMsg);
449         *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
450                         &pParse->sLastToken);
451         nErr++;
452         goto abort_parse;
453       }
454       case TK_SEMI: {
455         pParse->zTail = &zSql[i];
456         /* Fall thru into the default case */
457       }
458       default: {
459         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
460         lastTokenParsed = tokenType;
461         if( pParse->rc!=SQLITE_OK ){
462           goto abort_parse;
463         }
464         break;
465       }
466     }
467   }
468 abort_parse:
469   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
470     if( lastTokenParsed!=TK_SEMI ){
471       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
472       pParse->zTail = &zSql[i];
473     }
474     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
475   }
476 #ifdef YYTRACKMAXSTACKDEPTH
477   sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK,
478       sqlite3ParserStackPeak(pEngine)
479   );
480 #endif /* YYDEBUG */
481   sqlite3ParserFree(pEngine, sqlite3_free);
482   db->lookaside.bEnabled = enableLookaside;
483   if( db->mallocFailed ){
484     pParse->rc = SQLITE_NOMEM;
485   }
486   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
487     sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc));
488   }
489   assert( pzErrMsg!=0 );
490   if( pParse->zErrMsg ){
491     *pzErrMsg = pParse->zErrMsg;
492     pParse->zErrMsg = 0;
493     nErr++;
494   }
495   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
496     sqlite3VdbeDelete(pParse->pVdbe);
497     pParse->pVdbe = 0;
498   }
499 #ifndef SQLITE_OMIT_SHARED_CACHE
500   if( pParse->nested==0 ){
501     sqlite3DbFree(db, pParse->aTableLock);
502     pParse->aTableLock = 0;
503     pParse->nTableLock = 0;
504   }
505 #endif
506 #ifndef SQLITE_OMIT_VIRTUALTABLE
507   sqlite3DbFree(db, pParse->apVtabLock);
508 #endif
509 
510   if( !IN_DECLARE_VTAB ){
511     /* If the pParse->declareVtab flag is set, do not delete any table
512     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
513     ** will take responsibility for freeing the Table structure.
514     */
515     sqlite3DeleteTable(pParse->pNewTable);
516   }
517 
518   sqlite3DeleteTrigger(db, pParse->pNewTrigger);
519   sqlite3DbFree(db, pParse->apVarExpr);
520   sqlite3DbFree(db, pParse->aAlias);
521   while( pParse->pAinc ){
522     AutoincInfo *p = pParse->pAinc;
523     pParse->pAinc = p->pNext;
524     sqlite3DbFree(db, p);
525   }
526   while( pParse->pZombieTab ){
527     Table *p = pParse->pZombieTab;
528     pParse->pZombieTab = p->pNextZombie;
529     sqlite3DeleteTable(p);
530   }
531   if( nErr>0 && pParse->rc==SQLITE_OK ){
532     pParse->rc = SQLITE_ERROR;
533   }
534   return nErr;
535 }
536