1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that splits an SQL input string up into 15 ** individual tokens and sends those tokens one-by-one over to the 16 ** parser for analysis. 17 ** 18 ** $Id: tokenize.c,v 1.107 2005/08/23 11:31:26 drh Exp $ 19 */ 20 #include "sqliteInt.h" 21 #include "os.h" 22 #include <ctype.h> 23 #include <stdlib.h> 24 25 /* 26 ** The sqlite3KeywordCode function looks up an identifier to determine if 27 ** it is a keyword. If it is a keyword, the token code of that keyword is 28 ** returned. If the input is not a keyword, TK_ID is returned. 29 ** 30 ** The implementation of this routine was generated by a program, 31 ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 32 ** The output of the mkkeywordhash.c program is written into a file 33 ** named keywordhash.h and then included into this source file by 34 ** the #include below. 35 */ 36 #include "keywordhash.h" 37 38 39 /* 40 ** If X is a character that can be used in an identifier and 41 ** X&0x80==0 then sqlite3IsIdChar[X] will be 1. If X&0x80==0x80 then 42 ** X is always an identifier character. (Hence all UTF-8 43 ** characters can be part of an identifier). sqlite3IsIdChar[X] will 44 ** be 0 for every character in the lower 128 ASCII characters 45 ** that cannot be used as part of an identifier. 46 ** 47 ** In this implementation, an identifier can be a string of 48 ** alphabetic characters, digits, and "_" plus any character 49 ** with the high-order bit set. The latter rule means that 50 ** any sequence of UTF-8 characters or characters taken from 51 ** an extended ISO8859 character set can form an identifier. 52 ** 53 ** Ticket #1066. the SQL standard does not allow '$' in the 54 ** middle of identfiers. But many SQL implementations do. 55 ** SQLite will allow '$' in identifiers for compatibility. 56 ** But the feature is undocumented. 57 */ 58 const char sqlite3IsIdChar[] = { 59 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 60 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ 62 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ 63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ 64 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ 65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ 66 }; 67 68 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20])) 69 70 /* 71 ** Return the length of the token that begins at z[0]. 72 ** Store the token type in *tokenType before returning. 73 */ 74 static int getToken(const unsigned char *z, int *tokenType){ 75 int i, c; 76 switch( *z ){ 77 case ' ': case '\t': case '\n': case '\f': case '\r': { 78 for(i=1; isspace(z[i]); i++){} 79 *tokenType = TK_SPACE; 80 return i; 81 } 82 case '-': { 83 if( z[1]=='-' ){ 84 for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 85 *tokenType = TK_COMMENT; 86 return i; 87 } 88 *tokenType = TK_MINUS; 89 return 1; 90 } 91 case '(': { 92 *tokenType = TK_LP; 93 return 1; 94 } 95 case ')': { 96 *tokenType = TK_RP; 97 return 1; 98 } 99 case ';': { 100 *tokenType = TK_SEMI; 101 return 1; 102 } 103 case '+': { 104 *tokenType = TK_PLUS; 105 return 1; 106 } 107 case '*': { 108 *tokenType = TK_STAR; 109 return 1; 110 } 111 case '/': { 112 if( z[1]!='*' || z[2]==0 ){ 113 *tokenType = TK_SLASH; 114 return 1; 115 } 116 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 117 if( c ) i++; 118 *tokenType = TK_COMMENT; 119 return i; 120 } 121 case '%': { 122 *tokenType = TK_REM; 123 return 1; 124 } 125 case '=': { 126 *tokenType = TK_EQ; 127 return 1 + (z[1]=='='); 128 } 129 case '<': { 130 if( (c=z[1])=='=' ){ 131 *tokenType = TK_LE; 132 return 2; 133 }else if( c=='>' ){ 134 *tokenType = TK_NE; 135 return 2; 136 }else if( c=='<' ){ 137 *tokenType = TK_LSHIFT; 138 return 2; 139 }else{ 140 *tokenType = TK_LT; 141 return 1; 142 } 143 } 144 case '>': { 145 if( (c=z[1])=='=' ){ 146 *tokenType = TK_GE; 147 return 2; 148 }else if( c=='>' ){ 149 *tokenType = TK_RSHIFT; 150 return 2; 151 }else{ 152 *tokenType = TK_GT; 153 return 1; 154 } 155 } 156 case '!': { 157 if( z[1]!='=' ){ 158 *tokenType = TK_ILLEGAL; 159 return 2; 160 }else{ 161 *tokenType = TK_NE; 162 return 2; 163 } 164 } 165 case '|': { 166 if( z[1]!='|' ){ 167 *tokenType = TK_BITOR; 168 return 1; 169 }else{ 170 *tokenType = TK_CONCAT; 171 return 2; 172 } 173 } 174 case ',': { 175 *tokenType = TK_COMMA; 176 return 1; 177 } 178 case '&': { 179 *tokenType = TK_BITAND; 180 return 1; 181 } 182 case '~': { 183 *tokenType = TK_BITNOT; 184 return 1; 185 } 186 case '`': 187 case '\'': 188 case '"': { 189 int delim = z[0]; 190 for(i=1; (c=z[i])!=0; i++){ 191 if( c==delim ){ 192 if( z[i+1]==delim ){ 193 i++; 194 }else{ 195 break; 196 } 197 } 198 } 199 if( c ) i++; 200 *tokenType = TK_STRING; 201 return i; 202 } 203 case '.': { 204 #ifndef SQLITE_OMIT_FLOATING_POINT 205 if( !isdigit(z[1]) ) 206 #endif 207 { 208 *tokenType = TK_DOT; 209 return 1; 210 } 211 /* If the next character is a digit, this is a floating point 212 ** number that begins with ".". Fall thru into the next case */ 213 } 214 case '0': case '1': case '2': case '3': case '4': 215 case '5': case '6': case '7': case '8': case '9': { 216 *tokenType = TK_INTEGER; 217 for(i=0; isdigit(z[i]); i++){} 218 #ifndef SQLITE_OMIT_FLOATING_POINT 219 if( z[i]=='.' ){ 220 i++; 221 while( isdigit(z[i]) ){ i++; } 222 *tokenType = TK_FLOAT; 223 } 224 if( (z[i]=='e' || z[i]=='E') && 225 ( isdigit(z[i+1]) 226 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 227 ) 228 ){ 229 i += 2; 230 while( isdigit(z[i]) ){ i++; } 231 *tokenType = TK_FLOAT; 232 } 233 #endif 234 return i; 235 } 236 case '[': { 237 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 238 *tokenType = TK_ID; 239 return i; 240 } 241 case '?': { 242 *tokenType = TK_VARIABLE; 243 for(i=1; isdigit(z[i]); i++){} 244 return i; 245 } 246 case '#': { 247 for(i=1; isdigit(z[i]); i++){} 248 if( i>1 ){ 249 /* Parameters of the form #NNN (where NNN is a number) are used 250 ** internally by sqlite3NestedParse. */ 251 *tokenType = TK_REGISTER; 252 return i; 253 } 254 /* Fall through into the next case if the '#' is not followed by 255 ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 256 } 257 #ifndef SQLITE_OMIT_TCL_VARIABLE 258 case '$': 259 #endif 260 case ':': { 261 int n = 0; 262 *tokenType = TK_VARIABLE; 263 for(i=1; (c=z[i])!=0; i++){ 264 if( IdChar(c) ){ 265 n++; 266 #ifndef SQLITE_OMIT_TCL_VARIABLE 267 }else if( c=='(' && n>0 ){ 268 do{ 269 i++; 270 }while( (c=z[i])!=0 && !isspace(c) && c!=')' ); 271 if( c==')' ){ 272 i++; 273 }else{ 274 *tokenType = TK_ILLEGAL; 275 } 276 break; 277 }else if( c==':' && z[i+1]==':' ){ 278 i++; 279 #endif 280 }else{ 281 break; 282 } 283 } 284 if( n==0 ) *tokenType = TK_ILLEGAL; 285 return i; 286 } 287 #ifndef SQLITE_OMIT_BLOB_LITERAL 288 case 'x': case 'X': { 289 if( (c=z[1])=='\'' || c=='"' ){ 290 int delim = c; 291 *tokenType = TK_BLOB; 292 for(i=2; (c=z[i])!=0; i++){ 293 if( c==delim ){ 294 if( i%2 ) *tokenType = TK_ILLEGAL; 295 break; 296 } 297 if( !isxdigit(c) ){ 298 *tokenType = TK_ILLEGAL; 299 return i; 300 } 301 } 302 if( c ) i++; 303 return i; 304 } 305 /* Otherwise fall through to the next case */ 306 } 307 #endif 308 default: { 309 if( !IdChar(*z) ){ 310 break; 311 } 312 for(i=1; IdChar(z[i]); i++){} 313 *tokenType = keywordCode((char*)z, i); 314 return i; 315 } 316 } 317 *tokenType = TK_ILLEGAL; 318 return 1; 319 } 320 int sqlite3GetToken(const unsigned char *z, int *tokenType){ 321 return getToken(z, tokenType); 322 } 323 324 /* 325 ** Run the parser on the given SQL string. The parser structure is 326 ** passed in. An SQLITE_ status code is returned. If an error occurs 327 ** and pzErrMsg!=NULL then an error message might be written into 328 ** memory obtained from malloc() and *pzErrMsg made to point to that 329 ** error message. Or maybe not. 330 */ 331 int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 332 int nErr = 0; 333 int i; 334 void *pEngine; 335 int tokenType; 336 int lastTokenParsed = -1; 337 sqlite3 *db = pParse->db; 338 extern void *sqlite3ParserAlloc(void*(*)(int)); 339 extern void sqlite3ParserFree(void*, void(*)(void*)); 340 extern int sqlite3Parser(void*, int, Token, Parse*); 341 342 db->flags &= ~SQLITE_Interrupt; 343 pParse->rc = SQLITE_OK; 344 i = 0; 345 pEngine = sqlite3ParserAlloc((void*(*)(int))sqlite3MallocX); 346 if( pEngine==0 ){ 347 sqlite3SetString(pzErrMsg, "out of memory", (char*)0); 348 return SQLITE_NOMEM; 349 } 350 assert( pParse->sLastToken.dyn==0 ); 351 assert( pParse->pNewTable==0 ); 352 assert( pParse->pNewTrigger==0 ); 353 assert( pParse->nVar==0 ); 354 assert( pParse->nVarExpr==0 ); 355 assert( pParse->nVarExprAlloc==0 ); 356 assert( pParse->apVarExpr==0 ); 357 pParse->zTail = pParse->zSql = zSql; 358 while( sqlite3_malloc_failed==0 && zSql[i]!=0 ){ 359 assert( i>=0 ); 360 pParse->sLastToken.z = &zSql[i]; 361 assert( pParse->sLastToken.dyn==0 ); 362 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType); 363 i += pParse->sLastToken.n; 364 switch( tokenType ){ 365 case TK_SPACE: 366 case TK_COMMENT: { 367 if( (db->flags & SQLITE_Interrupt)!=0 ){ 368 pParse->rc = SQLITE_INTERRUPT; 369 sqlite3SetString(pzErrMsg, "interrupt", (char*)0); 370 goto abort_parse; 371 } 372 break; 373 } 374 case TK_ILLEGAL: { 375 if( pzErrMsg ){ 376 sqliteFree(*pzErrMsg); 377 *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"", 378 &pParse->sLastToken); 379 } 380 nErr++; 381 goto abort_parse; 382 } 383 case TK_SEMI: { 384 pParse->zTail = &zSql[i]; 385 /* Fall thru into the default case */ 386 } 387 default: { 388 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 389 lastTokenParsed = tokenType; 390 if( pParse->rc!=SQLITE_OK ){ 391 goto abort_parse; 392 } 393 break; 394 } 395 } 396 } 397 abort_parse: 398 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 399 if( lastTokenParsed!=TK_SEMI ){ 400 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 401 pParse->zTail = &zSql[i]; 402 } 403 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 404 } 405 sqlite3ParserFree(pEngine, sqlite3FreeX); 406 if( sqlite3_malloc_failed ){ 407 pParse->rc = SQLITE_NOMEM; 408 } 409 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 410 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), 411 (char*)0); 412 } 413 if( pParse->zErrMsg ){ 414 if( pzErrMsg && *pzErrMsg==0 ){ 415 *pzErrMsg = pParse->zErrMsg; 416 }else{ 417 sqliteFree(pParse->zErrMsg); 418 } 419 pParse->zErrMsg = 0; 420 if( !nErr ) nErr++; 421 } 422 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 423 sqlite3VdbeDelete(pParse->pVdbe); 424 pParse->pVdbe = 0; 425 } 426 sqlite3DeleteTable(pParse->db, pParse->pNewTable); 427 sqlite3DeleteTrigger(pParse->pNewTrigger); 428 sqliteFree(pParse->apVarExpr); 429 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){ 430 pParse->rc = SQLITE_ERROR; 431 } 432 return nErr; 433 } 434