1 /* 2 ** Copyright (c) 1999, 2000 D. Richard Hipp 3 ** 4 ** This program is free software; you can redistribute it and/or 5 ** modify it under the terms of the GNU General Public 6 ** License as published by the Free Software Foundation; either 7 ** version 2 of the License, or (at your option) any later version. 8 ** 9 ** This program is distributed in the hope that it will be useful, 10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 ** General Public License for more details. 13 ** 14 ** You should have received a copy of the GNU General Public 15 ** License along with this library; if not, write to the 16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330, 17 ** Boston, MA 02111-1307, USA. 18 ** 19 ** Author contact information: 20 ** [email protected] 21 ** http://www.hwaci.com/drh/ 22 ** 23 ************************************************************************* 24 ** An tokenizer for SQL 25 ** 26 ** This file contains C code that splits an SQL input string up into 27 ** individual tokens and sends those tokens one-by-one over to the 28 ** parser for analysis. 29 ** 30 ** $Id: tokenize.c,v 1.13 2000/08/09 17:17:25 drh Exp $ 31 */ 32 #include "sqliteInt.h" 33 #include <ctype.h> 34 #include <stdlib.h> 35 36 /* 37 ** All the keywords of the SQL language are stored as in a hash 38 ** table composed of instances of the following structure. 39 */ 40 typedef struct Keyword Keyword; 41 struct Keyword { 42 char *zName; /* The keyword name */ 43 int len; /* Number of characters in the keyword */ 44 int tokenType; /* The token value for this keyword */ 45 Keyword *pNext; /* Next keyword with the same hash */ 46 }; 47 48 /* 49 ** These are the keywords 50 */ 51 static Keyword aKeywordTable[] = { 52 { "ALL", 0, TK_ALL, 0 }, 53 { "AND", 0, TK_AND, 0 }, 54 { "AS", 0, TK_AS, 0 }, 55 { "ASC", 0, TK_ASC, 0 }, 56 { "BETWEEN", 0, TK_BETWEEN, 0 }, 57 { "BY", 0, TK_BY, 0 }, 58 { "CHECK", 0, TK_CHECK, 0 }, 59 { "CONSTRAINT", 0, TK_CONSTRAINT, 0 }, 60 { "COPY", 0, TK_COPY, 0 }, 61 { "CREATE", 0, TK_CREATE, 0 }, 62 { "DEFAULT", 0, TK_DEFAULT, 0 }, 63 { "DELETE", 0, TK_DELETE, 0 }, 64 { "DELIMITERS", 0, TK_DELIMITERS, 0 }, 65 { "DESC", 0, TK_DESC, 0 }, 66 { "DISTINCT", 0, TK_DISTINCT, 0 }, 67 { "DROP", 0, TK_DROP, 0 }, 68 { "EXCEPT", 0, TK_EXCEPT, 0 }, 69 { "EXPLAIN", 0, TK_EXPLAIN, 0 }, 70 { "FROM", 0, TK_FROM, 0 }, 71 { "GLOB", 0, TK_GLOB, 0 }, 72 { "GROUP", 0, TK_GROUP, 0 }, 73 { "HAVING", 0, TK_HAVING, 0 }, 74 { "IN", 0, TK_IN, 0 }, 75 { "INDEX", 0, TK_INDEX, 0 }, 76 { "INSERT", 0, TK_INSERT, 0 }, 77 { "INTERSECT", 0, TK_INTERSECT, 0 }, 78 { "INTO", 0, TK_INTO, 0 }, 79 { "IS", 0, TK_IS, 0 }, 80 { "ISNULL", 0, TK_ISNULL, 0 }, 81 { "KEY", 0, TK_KEY, 0 }, 82 { "LIKE", 0, TK_LIKE, 0 }, 83 { "NOT", 0, TK_NOT, 0 }, 84 { "NOTNULL", 0, TK_NOTNULL, 0 }, 85 { "NULL", 0, TK_NULL, 0 }, 86 { "ON", 0, TK_ON, 0 }, 87 { "OR", 0, TK_OR, 0 }, 88 { "ORDER", 0, TK_ORDER, 0 }, 89 { "PRIMARY", 0, TK_PRIMARY, 0 }, 90 { "SELECT", 0, TK_SELECT, 0 }, 91 { "SET", 0, TK_SET, 0 }, 92 { "TABLE", 0, TK_TABLE, 0 }, 93 { "UNION", 0, TK_UNION, 0 }, 94 { "UNIQUE", 0, TK_UNIQUE, 0 }, 95 { "UPDATE", 0, TK_UPDATE, 0 }, 96 { "USING", 0, TK_USING, 0 }, 97 { "VACUUM", 0, TK_VACUUM, 0 }, 98 { "VALUES", 0, TK_VALUES, 0 }, 99 { "WHERE", 0, TK_WHERE, 0 }, 100 }; 101 102 /* 103 ** This is the hash table 104 */ 105 #define KEY_HASH_SIZE 37 106 static Keyword *apHashTable[KEY_HASH_SIZE]; 107 108 109 /* 110 ** This function looks up an identifier to determine if it is a 111 ** keyword. If it is a keyword, the token code of that keyword is 112 ** returned. If the input is not a keyword, TK_ID is returned. 113 */ 114 static int sqliteKeywordCode(const char *z, int n){ 115 int h; 116 Keyword *p; 117 if( aKeywordTable[0].len==0 ){ 118 /* Initialize the keyword hash table */ 119 int i; 120 int n; 121 n = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); 122 for(i=0; i<n; i++){ 123 aKeywordTable[i].len = strlen(aKeywordTable[i].zName); 124 h = sqliteHashNoCase(aKeywordTable[i].zName, aKeywordTable[i].len); 125 h %= KEY_HASH_SIZE; 126 aKeywordTable[i].pNext = apHashTable[h]; 127 apHashTable[h] = &aKeywordTable[i]; 128 } 129 } 130 h = sqliteHashNoCase(z, n) % KEY_HASH_SIZE; 131 for(p=apHashTable[h]; p; p=p->pNext){ 132 if( p->len==n && sqliteStrNICmp(p->zName, z, n)==0 ){ 133 return p->tokenType; 134 } 135 } 136 return TK_ID; 137 } 138 139 /* 140 ** Return the length of the token that begins at z[0]. Return 141 ** -1 if the token is (or might be) incomplete. Store the token 142 ** type in *tokenType before returning. 143 */ 144 int sqliteGetToken(const char *z, int *tokenType){ 145 int i; 146 switch( *z ){ 147 case ' ': case '\t': case '\n': case '\f': case '\r': { 148 for(i=1; z[i] && isspace(z[i]); i++){} 149 *tokenType = TK_SPACE; 150 return i; 151 } 152 case '-': { 153 if( z[1]==0 ) return -1; 154 if( z[1]=='-' ){ 155 for(i=2; z[i] && z[i]!='\n'; i++){} 156 *tokenType = TK_COMMENT; 157 return i; 158 } 159 *tokenType = TK_MINUS; 160 return 1; 161 } 162 case '(': { 163 *tokenType = TK_LP; 164 return 1; 165 } 166 case ')': { 167 *tokenType = TK_RP; 168 return 1; 169 } 170 case ';': { 171 *tokenType = TK_SEMI; 172 return 1; 173 } 174 case '+': { 175 *tokenType = TK_PLUS; 176 return 1; 177 } 178 case '*': { 179 *tokenType = TK_STAR; 180 return 1; 181 } 182 case '/': { 183 *tokenType = TK_SLASH; 184 return 1; 185 } 186 case '=': { 187 *tokenType = TK_EQ; 188 return 1 + (z[1]=='='); 189 } 190 case '<': { 191 if( z[1]=='=' ){ 192 *tokenType = TK_LE; 193 return 2; 194 }else if( z[1]=='>' ){ 195 *tokenType = TK_NE; 196 return 2; 197 }else{ 198 *tokenType = TK_LT; 199 return 1; 200 } 201 } 202 case '>': { 203 if( z[1]=='=' ){ 204 *tokenType = TK_GE; 205 return 2; 206 }else{ 207 *tokenType = TK_GT; 208 return 1; 209 } 210 } 211 case '!': { 212 if( z[1]!='=' ){ 213 *tokenType = TK_ILLEGAL; 214 return 2; 215 }else{ 216 *tokenType = TK_NE; 217 return 2; 218 } 219 } 220 case '|': { 221 if( z[1]!='|' ){ 222 *tokenType = TK_ILLEGAL; 223 return 1; 224 }else{ 225 *tokenType = TK_CONCAT; 226 return 2; 227 } 228 } 229 case ',': { 230 *tokenType = TK_COMMA; 231 return 1; 232 } 233 case '\'': case '"': { 234 int delim = z[0]; 235 for(i=1; z[i]; i++){ 236 if( z[i]==delim ){ 237 if( z[i+1]==delim ){ 238 i++; 239 }else{ 240 break; 241 } 242 } 243 } 244 if( z[i] ) i++; 245 *tokenType = TK_STRING; 246 return i; 247 } 248 case '.': { 249 if( !isdigit(z[1]) ){ 250 *tokenType = TK_DOT; 251 return 1; 252 } 253 /* Fall thru into the next case */ 254 } 255 case '0': case '1': case '2': case '3': case '4': 256 case '5': case '6': case '7': case '8': case '9': { 257 *tokenType = TK_INTEGER; 258 for(i=1; z[i] && isdigit(z[i]); i++){} 259 if( z[i]=='.' ){ 260 i++; 261 while( z[i] && isdigit(z[i]) ){ i++; } 262 *tokenType = TK_FLOAT; 263 } 264 if( (z[i]=='e' || z[i]=='E') && 265 ( isdigit(z[i+1]) 266 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2])) 267 ) 268 ){ 269 i += 2; 270 while( z[i] && isdigit(z[i]) ){ i++; } 271 *tokenType = TK_FLOAT; 272 }else if( z[0]=='.' ){ 273 *tokenType = TK_FLOAT; 274 } 275 return i; 276 } 277 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 278 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 279 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 280 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 281 case 'y': case 'z': case '_': 282 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 283 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 284 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 285 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 286 case 'Y': case 'Z': { 287 for(i=1; z[i] && (isalnum(z[i]) || z[i]=='_'); i++){} 288 *tokenType = sqliteKeywordCode(z, i); 289 return i; 290 } 291 default: { 292 break; 293 } 294 } 295 *tokenType = TK_ILLEGAL; 296 return 1; 297 } 298 299 /* 300 ** Run the parser on the given SQL string. The parser structure is 301 ** passed in. Return the number of errors. 302 */ 303 int sqliteRunParser(Parse *pParse, char *zSql, char **pzErrMsg){ 304 int nErr = 0; 305 int i; 306 void *pEngine; 307 int once = 1; 308 static FILE *trace = 0; 309 extern void *sqliteParserAlloc(void*(*)(int)); 310 extern void sqliteParserFree(void*, void(*)(void*)); 311 extern int sqliteParser(void*, int, ...); 312 extern void sqliteParserTrace(FILE*, char *); 313 314 i = 0; 315 sqliteParseInfoReset(pParse); 316 pEngine = sqliteParserAlloc((void*(*)(int))malloc); 317 if( pEngine==0 ){ 318 sqliteSetString(pzErrMsg, "out of memory", 0); 319 return 1; 320 } 321 sqliteParserTrace(trace, "parser: "); 322 while( nErr==0 && i>=0 && zSql[i]!=0 ){ 323 int tokenType; 324 325 pParse->sLastToken.z = &zSql[i]; 326 pParse->sLastToken.n = sqliteGetToken(&zSql[i], &tokenType); 327 i += pParse->sLastToken.n; 328 if( once ){ 329 pParse->sFirstToken = pParse->sLastToken; 330 once = 0; 331 } 332 switch( tokenType ){ 333 case TK_SPACE: 334 break; 335 case TK_COMMENT: { 336 /* Various debugging modes can be turned on and off using 337 ** special SQL comments. Check for the special comments 338 ** here and take approriate action if found. 339 */ 340 #ifndef NDEBUG 341 char *z = pParse->sLastToken.z; 342 if( sqliteStrNICmp(z,"--parser-trace-on--",19)==0 ){ 343 trace = stderr; 344 sqliteParserTrace(trace, "parser: "); 345 }else if( sqliteStrNICmp(z,"--parser-trace-off--", 20)==0 ){ 346 trace = 0; 347 sqliteParserTrace(trace, "parser: "); 348 }else if( sqliteStrNICmp(z,"--vdbe-trace-on--",17)==0 ){ 349 pParse->db->flags |= SQLITE_VdbeTrace; 350 }else if( sqliteStrNICmp(z,"--vdbe-trace-off--", 18)==0 ){ 351 pParse->db->flags &= ~SQLITE_VdbeTrace; 352 #ifdef MEMORY_DEBUG 353 }else if( sqliteStrNICmp(z,"--malloc-fail=",14)==0 ){ 354 sqlite_iMallocFail = atoi(&z[14]); 355 }else if( sqliteStrNICmp(z,"--malloc-stats--", 16)==0 ){ 356 if( pParse->xCallback ){ 357 static char *azName[4] = {"malloc", "free", "to_fail", 0 }; 358 char *azArg[4]; 359 char zVal[3][30]; 360 sprintf(zVal[0],"%d", sqlite_nMalloc); 361 sprintf(zVal[1],"%d", sqlite_nFree); 362 sprintf(zVal[2],"%d", sqlite_iMallocFail); 363 azArg[0] = zVal[0]; 364 azArg[1] = zVal[1]; 365 azArg[2] = zVal[2]; 366 azArg[3] = 0; 367 pParse->xCallback(pParse->pArg, 3, azArg, azName); 368 } 369 #endif 370 } 371 #endif 372 break; 373 } 374 case TK_ILLEGAL: 375 sqliteSetNString(pzErrMsg, "unrecognized token: \"", -1, 376 pParse->sLastToken.z, pParse->sLastToken.n, "\"", 1, 0); 377 nErr++; 378 break; 379 default: 380 sqliteParser(pEngine, tokenType, pParse->sLastToken, pParse); 381 if( pParse->zErrMsg && pParse->sErrToken.z ){ 382 sqliteSetNString(pzErrMsg, "near \"", -1, 383 pParse->sErrToken.z, pParse->sErrToken.n, 384 "\": ", -1, 385 pParse->zErrMsg, -1, 386 0); 387 nErr++; 388 sqliteFree(pParse->zErrMsg); 389 pParse->zErrMsg = 0; 390 } 391 break; 392 } 393 } 394 if( nErr==0 ){ 395 sqliteParser(pEngine, 0, pParse->sLastToken, pParse); 396 if( pParse->zErrMsg && pParse->sErrToken.z ){ 397 sqliteSetNString(pzErrMsg, "near \"", -1, 398 pParse->sErrToken.z, pParse->sErrToken.n, 399 "\": ", -1, 400 pParse->zErrMsg, -1, 401 0); 402 nErr++; 403 sqliteFree(pParse->zErrMsg); 404 pParse->zErrMsg = 0; 405 } 406 } 407 sqliteParserFree(pEngine, free); 408 if( pParse->zErrMsg ){ 409 if( pzErrMsg ){ 410 sqliteFree(*pzErrMsg); 411 *pzErrMsg = pParse->zErrMsg; 412 }else{ 413 sqliteFree(pParse->zErrMsg); 414 } 415 if( !nErr ) nErr++; 416 } 417 if( pParse->pVdbe ){ 418 sqliteVdbeDelete(pParse->pVdbe); 419 pParse->pVdbe = 0; 420 } 421 if( pParse->pNewTable ){ 422 sqliteDeleteTable(pParse->db, pParse->pNewTable); 423 pParse->pNewTable = 0; 424 } 425 sqliteParseInfoReset(pParse); 426 return nErr; 427 } 428