1 /* 2 ** 2001 September 15 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** An tokenizer for SQL 13 ** 14 ** This file contains C code that implements the sqlite3_complete() API. 15 ** This code used to be part of the tokenizer.c source file. But by 16 ** separating it out, the code will be automatically omitted from 17 ** static links that do not use it. 18 */ 19 #include "sqliteInt.h" 20 #ifndef SQLITE_OMIT_COMPLETE 21 22 /* 23 ** This is defined in tokenize.c. We just have to import the definition. 24 */ 25 #ifndef SQLITE_AMALGAMATION 26 #ifdef SQLITE_ASCII 27 extern const char sqlite3IsAsciiIdChar[]; 28 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsAsciiIdChar[c-0x20])) 29 #endif 30 #ifdef SQLITE_EBCDIC 31 extern const char sqlite3IsEbcdicIdChar[]; 32 #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 33 #endif 34 #endif /* SQLITE_AMALGAMATION */ 35 36 37 /* 38 ** Token types used by the sqlite3_complete() routine. See the header 39 ** comments on that procedure for additional information. 40 */ 41 #define tkSEMI 0 42 #define tkWS 1 43 #define tkOTHER 2 44 #define tkEXPLAIN 3 45 #define tkCREATE 4 46 #define tkTEMP 5 47 #define tkTRIGGER 6 48 #define tkEND 7 49 50 /* 51 ** Return TRUE if the given SQL string ends in a semicolon. 52 ** 53 ** Special handling is require for CREATE TRIGGER statements. 54 ** Whenever the CREATE TRIGGER keywords are seen, the statement 55 ** must end with ";END;". 56 ** 57 ** This implementation uses a state machine with 7 states: 58 ** 59 ** (0) START At the beginning or end of an SQL statement. This routine 60 ** returns 1 if it ends in the START state and 0 if it ends 61 ** in any other state. 62 ** 63 ** (1) NORMAL We are in the middle of statement which ends with a single 64 ** semicolon. 65 ** 66 ** (2) EXPLAIN The keyword EXPLAIN has been seen at the beginning of 67 ** a statement. 68 ** 69 ** (3) CREATE The keyword CREATE has been seen at the beginning of a 70 ** statement, possibly preceeded by EXPLAIN and/or followed by 71 ** TEMP or TEMPORARY 72 ** 73 ** (4) TRIGGER We are in the middle of a trigger definition that must be 74 ** ended by a semicolon, the keyword END, and another semicolon. 75 ** 76 ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at 77 ** the end of a trigger definition. 78 ** 79 ** (6) END We've seen the ";END" of the ";END;" that occurs at the end 80 ** of a trigger difinition. 81 ** 82 ** Transitions between states above are determined by tokens extracted 83 ** from the input. The following tokens are significant: 84 ** 85 ** (0) tkSEMI A semicolon. 86 ** (1) tkWS Whitespace 87 ** (2) tkOTHER Any other SQL token. 88 ** (3) tkEXPLAIN The "explain" keyword. 89 ** (4) tkCREATE The "create" keyword. 90 ** (5) tkTEMP The "temp" or "temporary" keyword. 91 ** (6) tkTRIGGER The "trigger" keyword. 92 ** (7) tkEND The "end" keyword. 93 ** 94 ** Whitespace never causes a state transition and is always ignored. 95 ** 96 ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed 97 ** to recognize the end of a trigger can be omitted. All we have to do 98 ** is look for a semicolon that is not part of an string or comment. 99 */ 100 int sqlite3_complete(const char *zSql){ 101 u8 state = 0; /* Current state, using numbers defined in header comment */ 102 u8 token; /* Value of the next token */ 103 104 #ifndef SQLITE_OMIT_TRIGGER 105 /* A complex statement machine used to detect the end of a CREATE TRIGGER 106 ** statement. This is the normal case. 107 */ 108 static const u8 trans[7][8] = { 109 /* Token: */ 110 /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */ 111 /* 0 START: */ { 0, 0, 1, 2, 3, 1, 1, 1, }, 112 /* 1 NORMAL: */ { 0, 1, 1, 1, 1, 1, 1, 1, }, 113 /* 2 EXPLAIN: */ { 0, 2, 2, 1, 3, 1, 1, 1, }, 114 /* 3 CREATE: */ { 0, 3, 1, 1, 1, 3, 4, 1, }, 115 /* 4 TRIGGER: */ { 5, 4, 4, 4, 4, 4, 4, 4, }, 116 /* 5 SEMI: */ { 5, 5, 4, 4, 4, 4, 4, 6, }, 117 /* 6 END: */ { 0, 6, 4, 4, 4, 4, 4, 4, }, 118 }; 119 #else 120 /* If triggers are not suppored by this compile then the statement machine 121 ** used to detect the end of a statement is much simplier 122 */ 123 static const u8 trans[2][3] = { 124 /* Token: */ 125 /* State: ** SEMI WS OTHER */ 126 /* 0 START: */ { 0, 0, 1, }, 127 /* 1 NORMAL: */ { 0, 1, 1, }, 128 }; 129 #endif /* SQLITE_OMIT_TRIGGER */ 130 131 while( *zSql ){ 132 switch( *zSql ){ 133 case ';': { /* A semicolon */ 134 token = tkSEMI; 135 break; 136 } 137 case ' ': 138 case '\r': 139 case '\t': 140 case '\n': 141 case '\f': { /* White space is ignored */ 142 token = tkWS; 143 break; 144 } 145 case '/': { /* C-style comments */ 146 if( zSql[1]!='*' ){ 147 token = tkOTHER; 148 break; 149 } 150 zSql += 2; 151 while( zSql[0] && (zSql[0]!='*' || zSql[1]!='/') ){ zSql++; } 152 if( zSql[0]==0 ) return 0; 153 zSql++; 154 token = tkWS; 155 break; 156 } 157 case '-': { /* SQL-style comments from "--" to end of line */ 158 if( zSql[1]!='-' ){ 159 token = tkOTHER; 160 break; 161 } 162 while( *zSql && *zSql!='\n' ){ zSql++; } 163 if( *zSql==0 ) return state==0; 164 token = tkWS; 165 break; 166 } 167 case '[': { /* Microsoft-style identifiers in [...] */ 168 zSql++; 169 while( *zSql && *zSql!=']' ){ zSql++; } 170 if( *zSql==0 ) return 0; 171 token = tkOTHER; 172 break; 173 } 174 case '`': /* Grave-accent quoted symbols used by MySQL */ 175 case '"': /* single- and double-quoted strings */ 176 case '\'': { 177 int c = *zSql; 178 zSql++; 179 while( *zSql && *zSql!=c ){ zSql++; } 180 if( *zSql==0 ) return 0; 181 token = tkOTHER; 182 break; 183 } 184 default: { 185 int c; 186 if( IdChar((u8)*zSql) ){ 187 /* Keywords and unquoted identifiers */ 188 int nId; 189 for(nId=1; IdChar(zSql[nId]); nId++){} 190 #ifdef SQLITE_OMIT_TRIGGER 191 token = tkOTHER; 192 #else 193 switch( *zSql ){ 194 case 'c': case 'C': { 195 if( nId==6 && sqlite3StrNICmp(zSql, "create", 6)==0 ){ 196 token = tkCREATE; 197 }else{ 198 token = tkOTHER; 199 } 200 break; 201 } 202 case 't': case 'T': { 203 if( nId==7 && sqlite3StrNICmp(zSql, "trigger", 7)==0 ){ 204 token = tkTRIGGER; 205 }else if( nId==4 && sqlite3StrNICmp(zSql, "temp", 4)==0 ){ 206 token = tkTEMP; 207 }else if( nId==9 && sqlite3StrNICmp(zSql, "temporary", 9)==0 ){ 208 token = tkTEMP; 209 }else{ 210 token = tkOTHER; 211 } 212 break; 213 } 214 case 'e': case 'E': { 215 if( nId==3 && sqlite3StrNICmp(zSql, "end", 3)==0 ){ 216 token = tkEND; 217 }else 218 #ifndef SQLITE_OMIT_EXPLAIN 219 if( nId==7 && sqlite3StrNICmp(zSql, "explain", 7)==0 ){ 220 token = tkEXPLAIN; 221 }else 222 #endif 223 { 224 token = tkOTHER; 225 } 226 break; 227 } 228 default: { 229 token = tkOTHER; 230 break; 231 } 232 } 233 #endif /* SQLITE_OMIT_TRIGGER */ 234 zSql += nId-1; 235 }else{ 236 /* Operators and special symbols */ 237 token = tkOTHER; 238 } 239 break; 240 } 241 } 242 state = trans[state][token]; 243 zSql++; 244 } 245 return state==0; 246 } 247 248 #ifndef SQLITE_OMIT_UTF16 249 /* 250 ** This routine is the same as the sqlite3_complete() routine described 251 ** above, except that the parameter is required to be UTF-16 encoded, not 252 ** UTF-8. 253 */ 254 int sqlite3_complete16(const void *zSql){ 255 sqlite3_value *pVal; 256 char const *zSql8; 257 int rc = SQLITE_NOMEM; 258 259 #ifndef SQLITE_OMIT_AUTOINIT 260 rc = sqlite3_initialize(); 261 if( rc ) return rc; 262 #endif 263 pVal = sqlite3ValueNew(0); 264 sqlite3ValueSetStr(pVal, -1, zSql, SQLITE_UTF16NATIVE, SQLITE_STATIC); 265 zSql8 = sqlite3ValueText(pVal, SQLITE_UTF8); 266 if( zSql8 ){ 267 rc = sqlite3_complete(zSql8); 268 }else{ 269 rc = SQLITE_NOMEM; 270 } 271 sqlite3ValueFree(pVal); 272 return sqlite3ApiExit(0, rc); 273 } 274 #endif /* SQLITE_OMIT_UTF16 */ 275 #endif /* SQLITE_OMIT_COMPLETE */ 276