1 /* 2 ** 2016-05-28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the implementation of an SQLite virtual table for 14 ** reading CSV files. 15 ** 16 ** Usage: 17 ** 18 ** .load ./csv 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); 20 ** SELECT * FROM csv; 21 ** 22 ** The columns are named "c1", "c2", "c3", ... by default. Or the 23 ** application can define its own CREATE TABLE statement using the 24 ** schema= parameter, like this: 25 ** 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( 27 ** filename = "../http.log", 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" 29 ** ); 30 ** 31 ** Instead of specifying a file, the text of the CSV can be loaded using 32 ** the data= parameter. 33 ** 34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have 35 ** N columns. If both the columns= and schema= parameters are omitted, then 36 ** the number and names of the columns is determined by the first line of 37 ** the CSV input. 38 ** 39 ** Some extra debugging features (used for testing virtual tables) are available 40 ** if this module is compiled with -DSQLITE_TEST. 41 */ 42 #include <sqlite3ext.h> 43 SQLITE_EXTENSION_INIT1 44 #include <string.h> 45 #include <stdlib.h> 46 #include <assert.h> 47 #include <stdarg.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #ifndef SQLITE_OMIT_VIRTUALTABLE 52 53 /* 54 ** A macro to hint to the compiler that a function should not be 55 ** inlined. 56 */ 57 #if defined(__GNUC__) 58 # define CSV_NOINLINE __attribute__((noinline)) 59 #elif defined(_MSC_VER) && _MSC_VER>=1310 60 # define CSV_NOINLINE __declspec(noinline) 61 #else 62 # define CSV_NOINLINE 63 #endif 64 65 66 /* Max size of the error message in a CsvReader */ 67 #define CSV_MXERR 200 68 69 /* Size of the CsvReader input buffer */ 70 #define CSV_INBUFSZ 1024 71 72 /* A context object used when read a CSV file. */ 73 typedef struct CsvReader CsvReader; 74 struct CsvReader { 75 FILE *in; /* Read the CSV text from this input stream */ 76 char *z; /* Accumulated text for a field */ 77 int n; /* Number of bytes in z */ 78 int nAlloc; /* Space allocated for z[] */ 79 int nLine; /* Current line number */ 80 int bNotFirst; /* True if prior text has been seen */ 81 int cTerm; /* Character that terminated the most recent field */ 82 size_t iIn; /* Next unread character in the input buffer */ 83 size_t nIn; /* Number of characters in the input buffer */ 84 char *zIn; /* The input buffer */ 85 char zErr[CSV_MXERR]; /* Error message */ 86 }; 87 88 /* Initialize a CsvReader object */ 89 static void csv_reader_init(CsvReader *p){ 90 p->in = 0; 91 p->z = 0; 92 p->n = 0; 93 p->nAlloc = 0; 94 p->nLine = 0; 95 p->bNotFirst = 0; 96 p->nIn = 0; 97 p->zIn = 0; 98 p->zErr[0] = 0; 99 } 100 101 /* Close and reset a CsvReader object */ 102 static void csv_reader_reset(CsvReader *p){ 103 if( p->in ){ 104 fclose(p->in); 105 sqlite3_free(p->zIn); 106 } 107 sqlite3_free(p->z); 108 csv_reader_init(p); 109 } 110 111 /* Report an error on a CsvReader */ 112 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ 113 va_list ap; 114 va_start(ap, zFormat); 115 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); 116 va_end(ap); 117 } 118 119 /* Open the file associated with a CsvReader 120 ** Return the number of errors. 121 */ 122 static int csv_reader_open( 123 CsvReader *p, /* The reader to open */ 124 const char *zFilename, /* Read from this filename */ 125 const char *zData /* ... or use this data */ 126 ){ 127 if( zFilename ){ 128 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); 129 if( p->zIn==0 ){ 130 csv_errmsg(p, "out of memory"); 131 return 1; 132 } 133 p->in = fopen(zFilename, "rb"); 134 if( p->in==0 ){ 135 sqlite3_free(p->zIn); 136 csv_reader_reset(p); 137 csv_errmsg(p, "cannot open '%s' for reading", zFilename); 138 return 1; 139 } 140 }else{ 141 assert( p->in==0 ); 142 p->zIn = (char*)zData; 143 p->nIn = strlen(zData); 144 } 145 return 0; 146 } 147 148 /* The input buffer has overflowed. Refill the input buffer, then 149 ** return the next character 150 */ 151 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ 152 size_t got; 153 154 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ 155 assert( p->in!=0 ); /* Only called if reading froma file */ 156 157 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); 158 if( got==0 ) return EOF; 159 p->nIn = got; 160 p->iIn = 1; 161 return p->zIn[0]; 162 } 163 164 /* Return the next character of input. Return EOF at end of input. */ 165 static int csv_getc(CsvReader *p){ 166 if( p->iIn >= p->nIn ){ 167 if( p->in!=0 ) return csv_getc_refill(p); 168 return EOF; 169 } 170 return ((unsigned char*)p->zIn)[p->iIn++]; 171 } 172 173 /* Increase the size of p->z and append character c to the end. 174 ** Return 0 on success and non-zero if there is an OOM error */ 175 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ 176 char *zNew; 177 int nNew = p->nAlloc*2 + 100; 178 zNew = sqlite3_realloc64(p->z, nNew); 179 if( zNew ){ 180 p->z = zNew; 181 p->nAlloc = nNew; 182 p->z[p->n++] = c; 183 return 0; 184 }else{ 185 csv_errmsg(p, "out of memory"); 186 return 1; 187 } 188 } 189 190 /* Append a single character to the CsvReader.z[] array. 191 ** Return 0 on success and non-zero if there is an OOM error */ 192 static int csv_append(CsvReader *p, char c){ 193 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); 194 p->z[p->n++] = c; 195 return 0; 196 } 197 198 /* Read a single field of CSV text. Compatible with rfc4180 and extended 199 ** with the option of having a separator other than ",". 200 ** 201 ** + Input comes from p->in. 202 ** + Store results in p->z of length p->n. Space to hold p->z comes 203 ** from sqlite3_malloc64(). 204 ** + Keep track of the line number in p->nLine. 205 ** + Store the character that terminates the field in p->cTerm. Store 206 ** EOF on end-of-file. 207 ** 208 ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have 209 ** been set to EOF. 210 */ 211 static char *csv_read_one_field(CsvReader *p){ 212 int c; 213 p->n = 0; 214 c = csv_getc(p); 215 if( c==EOF ){ 216 p->cTerm = EOF; 217 return 0; 218 } 219 if( c=='"' ){ 220 int pc, ppc; 221 int startLine = p->nLine; 222 pc = ppc = 0; 223 while( 1 ){ 224 c = csv_getc(p); 225 if( c<='"' || pc=='"' ){ 226 if( c=='\n' ) p->nLine++; 227 if( c=='"' ){ 228 if( pc=='"' ){ 229 pc = 0; 230 continue; 231 } 232 } 233 if( (c==',' && pc=='"') 234 || (c=='\n' && pc=='"') 235 || (c=='\n' && pc=='\r' && ppc=='"') 236 || (c==EOF && pc=='"') 237 ){ 238 do{ p->n--; }while( p->z[p->n]!='"' ); 239 p->cTerm = (char)c; 240 break; 241 } 242 if( pc=='"' && c!='\r' ){ 243 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); 244 break; 245 } 246 if( c==EOF ){ 247 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", 248 startLine, '"'); 249 p->cTerm = (char)c; 250 break; 251 } 252 } 253 if( csv_append(p, (char)c) ) return 0; 254 ppc = pc; 255 pc = c; 256 } 257 }else{ 258 /* If this is the first field being parsed and it begins with the 259 ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ 260 if( (c&0xff)==0xef && p->bNotFirst==0 ){ 261 csv_append(p, (char)c); 262 c = csv_getc(p); 263 if( (c&0xff)==0xbb ){ 264 csv_append(p, (char)c); 265 c = csv_getc(p); 266 if( (c&0xff)==0xbf ){ 267 p->bNotFirst = 1; 268 p->n = 0; 269 return csv_read_one_field(p); 270 } 271 } 272 } 273 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ 274 if( csv_append(p, (char)c) ) return 0; 275 c = csv_getc(p); 276 } 277 if( c=='\n' ){ 278 p->nLine++; 279 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; 280 } 281 p->cTerm = (char)c; 282 } 283 assert( p->z==0 || p->n<p->nAlloc ); 284 if( p->z ) p->z[p->n] = 0; 285 p->bNotFirst = 1; 286 return p->z; 287 } 288 289 290 /* Forward references to the various virtual table methods implemented 291 ** in this file. */ 292 static int csvtabCreate(sqlite3*, void*, int, const char*const*, 293 sqlite3_vtab**,char**); 294 static int csvtabConnect(sqlite3*, void*, int, const char*const*, 295 sqlite3_vtab**,char**); 296 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); 297 static int csvtabDisconnect(sqlite3_vtab*); 298 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); 299 static int csvtabClose(sqlite3_vtab_cursor*); 300 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, 301 int argc, sqlite3_value **argv); 302 static int csvtabNext(sqlite3_vtab_cursor*); 303 static int csvtabEof(sqlite3_vtab_cursor*); 304 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); 305 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); 306 307 /* An instance of the CSV virtual table */ 308 typedef struct CsvTable { 309 sqlite3_vtab base; /* Base class. Must be first */ 310 char *zFilename; /* Name of the CSV file */ 311 char *zData; /* Raw CSV data in lieu of zFilename */ 312 long iStart; /* Offset to start of data in zFilename */ 313 int nCol; /* Number of columns in the CSV file */ 314 unsigned int tstFlags; /* Bit values used for testing */ 315 } CsvTable; 316 317 /* Allowed values for tstFlags */ 318 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ 319 320 /* A cursor for the CSV virtual table */ 321 typedef struct CsvCursor { 322 sqlite3_vtab_cursor base; /* Base class. Must be first */ 323 CsvReader rdr; /* The CsvReader object */ 324 char **azVal; /* Value of the current row */ 325 int *aLen; /* Length of each entry */ 326 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ 327 } CsvCursor; 328 329 /* Transfer error message text from a reader into a CsvTable */ 330 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ 331 sqlite3_free(pTab->base.zErrMsg); 332 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); 333 } 334 335 /* 336 ** This method is the destructor fo a CsvTable object. 337 */ 338 static int csvtabDisconnect(sqlite3_vtab *pVtab){ 339 CsvTable *p = (CsvTable*)pVtab; 340 sqlite3_free(p->zFilename); 341 sqlite3_free(p->zData); 342 sqlite3_free(p); 343 return SQLITE_OK; 344 } 345 346 /* Skip leading whitespace. Return a pointer to the first non-whitespace 347 ** character, or to the zero terminator if the string has only whitespace */ 348 static const char *csv_skip_whitespace(const char *z){ 349 while( isspace((unsigned char)z[0]) ) z++; 350 return z; 351 } 352 353 /* Remove trailing whitespace from the end of string z[] */ 354 static void csv_trim_whitespace(char *z){ 355 size_t n = strlen(z); 356 while( n>0 && isspace((unsigned char)z[n]) ) n--; 357 z[n] = 0; 358 } 359 360 /* Dequote the string */ 361 static void csv_dequote(char *z){ 362 int j; 363 char cQuote = z[0]; 364 size_t i, n; 365 366 if( cQuote!='\'' && cQuote!='"' ) return; 367 n = strlen(z); 368 if( n<2 || z[n-1]!=z[0] ) return; 369 for(i=1, j=0; i<n-1; i++){ 370 if( z[i]==cQuote && z[i+1]==cQuote ) i++; 371 z[j++] = z[i]; 372 } 373 z[j] = 0; 374 } 375 376 /* Check to see if the string is of the form: "TAG = VALUE" with optional 377 ** whitespace before and around tokens. If it is, return a pointer to the 378 ** first character of VALUE. If it is not, return NULL. 379 */ 380 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ 381 z = csv_skip_whitespace(z); 382 if( strncmp(zTag, z, nTag)!=0 ) return 0; 383 z = csv_skip_whitespace(z+nTag); 384 if( z[0]!='=' ) return 0; 385 return csv_skip_whitespace(z+1); 386 } 387 388 /* Decode a parameter that requires a dequoted string. 389 ** 390 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned 391 ** even if there is an error. If an error occurs, then an error message 392 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. 393 */ 394 static int csv_string_parameter( 395 CsvReader *p, /* Leave the error message here, if there is one */ 396 const char *zParam, /* Parameter we are checking for */ 397 const char *zArg, /* Raw text of the virtual table argment */ 398 char **pzVal /* Write the dequoted string value here */ 399 ){ 400 const char *zValue; 401 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); 402 if( zValue==0 ) return 0; 403 p->zErr[0] = 0; 404 if( *pzVal ){ 405 csv_errmsg(p, "more than one '%s' parameter", zParam); 406 return 1; 407 } 408 *pzVal = sqlite3_mprintf("%s", zValue); 409 if( *pzVal==0 ){ 410 csv_errmsg(p, "out of memory"); 411 return 1; 412 } 413 csv_trim_whitespace(*pzVal); 414 csv_dequote(*pzVal); 415 return 1; 416 } 417 418 419 /* Return 0 if the argument is false and 1 if it is true. Return -1 if 420 ** we cannot really tell. 421 */ 422 static int csv_boolean(const char *z){ 423 if( sqlite3_stricmp("yes",z)==0 424 || sqlite3_stricmp("on",z)==0 425 || sqlite3_stricmp("true",z)==0 426 || (z[0]=='1' && z[1]==0) 427 ){ 428 return 1; 429 } 430 if( sqlite3_stricmp("no",z)==0 431 || sqlite3_stricmp("off",z)==0 432 || sqlite3_stricmp("false",z)==0 433 || (z[0]=='0' && z[1]==0) 434 ){ 435 return 0; 436 } 437 return -1; 438 } 439 440 /* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG". 441 ** If it is, set *pValue to be the value of the boolean ("true" if there is 442 ** not "= BOOLEAN" component) and return non-zero. If the input string 443 ** does not begin with TAG, return zero. 444 */ 445 static int csv_boolean_parameter( 446 const char *zTag, /* Tag we are looking for */ 447 int nTag, /* Size of the tag in bytes */ 448 const char *z, /* Input parameter */ 449 int *pValue /* Write boolean value here */ 450 ){ 451 int b; 452 z = csv_skip_whitespace(z); 453 if( strncmp(zTag, z, nTag)!=0 ) return 0; 454 z = csv_skip_whitespace(z + nTag); 455 if( z[0]==0 ){ 456 *pValue = 1; 457 return 1; 458 } 459 if( z[0]!='=' ) return 0; 460 z = csv_skip_whitespace(z+1); 461 b = csv_boolean(z); 462 if( b>=0 ){ 463 *pValue = b; 464 return 1; 465 } 466 return 0; 467 } 468 469 /* 470 ** Parameters: 471 ** filename=FILENAME Name of file containing CSV content 472 ** data=TEXT Direct CSV content. 473 ** schema=SCHEMA Alternative CSV schema. 474 ** header=YES|NO First row of CSV defines the names of 475 ** columns if "yes". Default "no". 476 ** columns=N Assume the CSV file contains N columns. 477 ** 478 ** Only available if compiled with SQLITE_TEST: 479 ** 480 ** testflags=N Bitmask of test flags. Optional 481 ** 482 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", 483 ** and so forth. If columns=N is omitted, then the file is opened and 484 ** the number of columns in the first row is counted to determine the 485 ** column count. If header=YES, then the first row is skipped. 486 */ 487 static int csvtabConnect( 488 sqlite3 *db, 489 void *pAux, 490 int argc, const char *const*argv, 491 sqlite3_vtab **ppVtab, 492 char **pzErr 493 ){ 494 CsvTable *pNew = 0; /* The CsvTable object to construct */ 495 int bHeader = -1; /* header= flags. -1 means not seen yet */ 496 int rc = SQLITE_OK; /* Result code from this routine */ 497 int i, j; /* Loop counters */ 498 #ifdef SQLITE_TEST 499 int tstFlags = 0; /* Value for testflags=N parameter */ 500 #endif 501 int b; /* Value of a boolean parameter */ 502 int nCol = -99; /* Value of the columns= parameter */ 503 CsvReader sRdr; /* A CSV file reader used to store an error 504 ** message and/or to count the number of columns */ 505 static const char *azParam[] = { 506 "filename", "data", "schema", 507 }; 508 char *azPValue[3]; /* Parameter values */ 509 # define CSV_FILENAME (azPValue[0]) 510 # define CSV_DATA (azPValue[1]) 511 # define CSV_SCHEMA (azPValue[2]) 512 513 514 assert( sizeof(azPValue)==sizeof(azParam) ); 515 memset(&sRdr, 0, sizeof(sRdr)); 516 memset(azPValue, 0, sizeof(azPValue)); 517 for(i=3; i<argc; i++){ 518 const char *z = argv[i]; 519 const char *zValue; 520 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ 521 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; 522 } 523 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ 524 if( sRdr.zErr[0] ) goto csvtab_connect_error; 525 }else 526 if( csv_boolean_parameter("header",6,z,&b) ){ 527 if( bHeader>=0 ){ 528 csv_errmsg(&sRdr, "more than one 'header' parameter"); 529 goto csvtab_connect_error; 530 } 531 bHeader = b; 532 }else 533 #ifdef SQLITE_TEST 534 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ 535 tstFlags = (unsigned int)atoi(zValue); 536 }else 537 #endif 538 if( (zValue = csv_parameter("columns",7,z))!=0 ){ 539 if( nCol>0 ){ 540 csv_errmsg(&sRdr, "more than one 'columns' parameter"); 541 goto csvtab_connect_error; 542 } 543 nCol = atoi(zValue); 544 if( nCol<=0 ){ 545 csv_errmsg(&sRdr, "column= value must be positive"); 546 goto csvtab_connect_error; 547 } 548 }else 549 { 550 csv_errmsg(&sRdr, "bad parameter: '%s'", z); 551 goto csvtab_connect_error; 552 } 553 } 554 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ 555 csv_errmsg(&sRdr, "must specify either filename= or data= but not both"); 556 goto csvtab_connect_error; 557 } 558 559 if( (nCol<=0 || bHeader==1) 560 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) 561 ){ 562 goto csvtab_connect_error; 563 } 564 pNew = sqlite3_malloc( sizeof(*pNew) ); 565 *ppVtab = (sqlite3_vtab*)pNew; 566 if( pNew==0 ) goto csvtab_connect_oom; 567 memset(pNew, 0, sizeof(*pNew)); 568 if( CSV_SCHEMA==0 ){ 569 sqlite3_str *pStr = sqlite3_str_new(0); 570 char *zSep = ""; 571 int iCol = 0; 572 sqlite3_str_appendf(pStr, "CREATE TABLE x("); 573 if( nCol<0 && bHeader<1 ){ 574 nCol = 0; 575 do{ 576 csv_read_one_field(&sRdr); 577 nCol++; 578 }while( sRdr.cTerm==',' ); 579 } 580 if( nCol>0 && bHeader<1 ){ 581 for(iCol=0; iCol<nCol; iCol++){ 582 sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol); 583 zSep = ","; 584 } 585 }else{ 586 do{ 587 char *z = csv_read_one_field(&sRdr); 588 if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){ 589 sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z); 590 zSep = ","; 591 iCol++; 592 } 593 }while( sRdr.cTerm==',' ); 594 if( nCol<0 ){ 595 nCol = iCol; 596 }else{ 597 while( iCol<nCol ){ 598 sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol); 599 zSep = ","; 600 } 601 } 602 } 603 pNew->nCol = nCol; 604 sqlite3_str_appendf(pStr, ")"); 605 CSV_SCHEMA = sqlite3_str_finish(pStr); 606 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; 607 }else if( nCol<0 ){ 608 do{ 609 csv_read_one_field(&sRdr); 610 pNew->nCol++; 611 }while( sRdr.cTerm==',' ); 612 }else{ 613 pNew->nCol = nCol; 614 } 615 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; 616 pNew->zData = CSV_DATA; CSV_DATA = 0; 617 #ifdef SQLITE_TEST 618 pNew->tstFlags = tstFlags; 619 #endif 620 if( bHeader!=1 ){ 621 pNew->iStart = 0; 622 }else if( pNew->zData ){ 623 pNew->iStart = (int)sRdr.iIn; 624 }else{ 625 pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn); 626 } 627 csv_reader_reset(&sRdr); 628 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); 629 if( rc ){ 630 csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db)); 631 goto csvtab_connect_error; 632 } 633 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 634 sqlite3_free(azPValue[i]); 635 } 636 /* Rationale for DIRECTONLY: 637 ** An attacker who controls a database schema could use this vtab 638 ** to exfiltrate sensitive data from other files in the filesystem. 639 ** And, recommended practice is to put all CSV virtual tables in the 640 ** TEMP namespace, so they should still be usable from within TEMP 641 ** views, so there shouldn't be a serious loss of functionality by 642 ** prohibiting the use of this vtab from persistent triggers and views. 643 */ 644 sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY); 645 return SQLITE_OK; 646 647 csvtab_connect_oom: 648 rc = SQLITE_NOMEM; 649 csv_errmsg(&sRdr, "out of memory"); 650 651 csvtab_connect_error: 652 if( pNew ) csvtabDisconnect(&pNew->base); 653 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 654 sqlite3_free(azPValue[i]); 655 } 656 if( sRdr.zErr[0] ){ 657 sqlite3_free(*pzErr); 658 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); 659 } 660 csv_reader_reset(&sRdr); 661 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; 662 return rc; 663 } 664 665 /* 666 ** Reset the current row content held by a CsvCursor. 667 */ 668 static void csvtabCursorRowReset(CsvCursor *pCur){ 669 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; 670 int i; 671 for(i=0; i<pTab->nCol; i++){ 672 sqlite3_free(pCur->azVal[i]); 673 pCur->azVal[i] = 0; 674 pCur->aLen[i] = 0; 675 } 676 } 677 678 /* 679 ** The xConnect and xCreate methods do the same thing, but they must be 680 ** different so that the virtual table is not an eponymous virtual table. 681 */ 682 static int csvtabCreate( 683 sqlite3 *db, 684 void *pAux, 685 int argc, const char *const*argv, 686 sqlite3_vtab **ppVtab, 687 char **pzErr 688 ){ 689 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); 690 } 691 692 /* 693 ** Destructor for a CsvCursor. 694 */ 695 static int csvtabClose(sqlite3_vtab_cursor *cur){ 696 CsvCursor *pCur = (CsvCursor*)cur; 697 csvtabCursorRowReset(pCur); 698 csv_reader_reset(&pCur->rdr); 699 sqlite3_free(cur); 700 return SQLITE_OK; 701 } 702 703 /* 704 ** Constructor for a new CsvTable cursor object. 705 */ 706 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ 707 CsvTable *pTab = (CsvTable*)p; 708 CsvCursor *pCur; 709 size_t nByte; 710 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; 711 pCur = sqlite3_malloc64( nByte ); 712 if( pCur==0 ) return SQLITE_NOMEM; 713 memset(pCur, 0, nByte); 714 pCur->azVal = (char**)&pCur[1]; 715 pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; 716 *ppCursor = &pCur->base; 717 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ 718 csv_xfer_error(pTab, &pCur->rdr); 719 return SQLITE_ERROR; 720 } 721 return SQLITE_OK; 722 } 723 724 725 /* 726 ** Advance a CsvCursor to its next row of input. 727 ** Set the EOF marker if we reach the end of input. 728 */ 729 static int csvtabNext(sqlite3_vtab_cursor *cur){ 730 CsvCursor *pCur = (CsvCursor*)cur; 731 CsvTable *pTab = (CsvTable*)cur->pVtab; 732 int i = 0; 733 char *z; 734 do{ 735 z = csv_read_one_field(&pCur->rdr); 736 if( z==0 ){ 737 break; 738 } 739 if( i<pTab->nCol ){ 740 if( pCur->aLen[i] < pCur->rdr.n+1 ){ 741 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); 742 if( zNew==0 ){ 743 csv_errmsg(&pCur->rdr, "out of memory"); 744 csv_xfer_error(pTab, &pCur->rdr); 745 break; 746 } 747 pCur->azVal[i] = zNew; 748 pCur->aLen[i] = pCur->rdr.n+1; 749 } 750 memcpy(pCur->azVal[i], z, pCur->rdr.n+1); 751 i++; 752 } 753 }while( pCur->rdr.cTerm==',' ); 754 if( z==0 && i==0 ){ 755 pCur->iRowid = -1; 756 }else{ 757 pCur->iRowid++; 758 while( i<pTab->nCol ){ 759 sqlite3_free(pCur->azVal[i]); 760 pCur->azVal[i] = 0; 761 pCur->aLen[i] = 0; 762 i++; 763 } 764 } 765 return SQLITE_OK; 766 } 767 768 /* 769 ** Return values of columns for the row at which the CsvCursor 770 ** is currently pointing. 771 */ 772 static int csvtabColumn( 773 sqlite3_vtab_cursor *cur, /* The cursor */ 774 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ 775 int i /* Which column to return */ 776 ){ 777 CsvCursor *pCur = (CsvCursor*)cur; 778 CsvTable *pTab = (CsvTable*)cur->pVtab; 779 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ 780 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT); 781 } 782 return SQLITE_OK; 783 } 784 785 /* 786 ** Return the rowid for the current row. 787 */ 788 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 789 CsvCursor *pCur = (CsvCursor*)cur; 790 *pRowid = pCur->iRowid; 791 return SQLITE_OK; 792 } 793 794 /* 795 ** Return TRUE if the cursor has been moved off of the last 796 ** row of output. 797 */ 798 static int csvtabEof(sqlite3_vtab_cursor *cur){ 799 CsvCursor *pCur = (CsvCursor*)cur; 800 return pCur->iRowid<0; 801 } 802 803 /* 804 ** Only a full table scan is supported. So xFilter simply rewinds to 805 ** the beginning. 806 */ 807 static int csvtabFilter( 808 sqlite3_vtab_cursor *pVtabCursor, 809 int idxNum, const char *idxStr, 810 int argc, sqlite3_value **argv 811 ){ 812 CsvCursor *pCur = (CsvCursor*)pVtabCursor; 813 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; 814 pCur->iRowid = 0; 815 816 /* Ensure the field buffer is always allocated. Otherwise, if the 817 ** first field is zero bytes in size, this may be mistaken for an OOM 818 ** error in csvtabNext(). */ 819 if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM; 820 821 if( pCur->rdr.in==0 ){ 822 assert( pCur->rdr.zIn==pTab->zData ); 823 assert( pTab->iStart>=0 ); 824 assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); 825 pCur->rdr.iIn = pTab->iStart; 826 }else{ 827 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); 828 pCur->rdr.iIn = 0; 829 pCur->rdr.nIn = 0; 830 } 831 return csvtabNext(pVtabCursor); 832 } 833 834 /* 835 ** Only a forward full table scan is supported. xBestIndex is mostly 836 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality 837 ** constraints lowers the estimated cost, which is fiction, but is useful 838 ** for testing certain kinds of virtual table behavior. 839 */ 840 static int csvtabBestIndex( 841 sqlite3_vtab *tab, 842 sqlite3_index_info *pIdxInfo 843 ){ 844 pIdxInfo->estimatedCost = 1000000; 845 #ifdef SQLITE_TEST 846 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ 847 /* The usual (and sensible) case is to always do a full table scan. 848 ** The code in this branch only runs when testflags=1. This code 849 ** generates an artifical and unrealistic plan which is useful 850 ** for testing virtual table logic but is not helpful to real applications. 851 ** 852 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual 853 ** table (even though it is not) and the cost of running the virtual table 854 ** is reduced from 1 million to just 10. The constraints are *not* marked 855 ** as omittable, however, so the query planner should still generate a 856 ** plan that gives a correct answer, even if they plan is not optimal. 857 */ 858 int i; 859 int nConst = 0; 860 for(i=0; i<pIdxInfo->nConstraint; i++){ 861 unsigned char op; 862 if( pIdxInfo->aConstraint[i].usable==0 ) continue; 863 op = pIdxInfo->aConstraint[i].op; 864 if( op==SQLITE_INDEX_CONSTRAINT_EQ 865 || op==SQLITE_INDEX_CONSTRAINT_LIKE 866 || op==SQLITE_INDEX_CONSTRAINT_GLOB 867 ){ 868 pIdxInfo->estimatedCost = 10; 869 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; 870 nConst++; 871 } 872 } 873 } 874 #endif 875 return SQLITE_OK; 876 } 877 878 879 static sqlite3_module CsvModule = { 880 0, /* iVersion */ 881 csvtabCreate, /* xCreate */ 882 csvtabConnect, /* xConnect */ 883 csvtabBestIndex, /* xBestIndex */ 884 csvtabDisconnect, /* xDisconnect */ 885 csvtabDisconnect, /* xDestroy */ 886 csvtabOpen, /* xOpen - open a cursor */ 887 csvtabClose, /* xClose - close a cursor */ 888 csvtabFilter, /* xFilter - configure scan constraints */ 889 csvtabNext, /* xNext - advance a cursor */ 890 csvtabEof, /* xEof - check for end of scan */ 891 csvtabColumn, /* xColumn - read data */ 892 csvtabRowid, /* xRowid - read data */ 893 0, /* xUpdate */ 894 0, /* xBegin */ 895 0, /* xSync */ 896 0, /* xCommit */ 897 0, /* xRollback */ 898 0, /* xFindMethod */ 899 0, /* xRename */ 900 }; 901 902 #ifdef SQLITE_TEST 903 /* 904 ** For virtual table testing, make a version of the CSV virtual table 905 ** available that has an xUpdate function. But the xUpdate always returns 906 ** SQLITE_READONLY since the CSV file is not really writable. 907 */ 908 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ 909 return SQLITE_READONLY; 910 } 911 static sqlite3_module CsvModuleFauxWrite = { 912 0, /* iVersion */ 913 csvtabCreate, /* xCreate */ 914 csvtabConnect, /* xConnect */ 915 csvtabBestIndex, /* xBestIndex */ 916 csvtabDisconnect, /* xDisconnect */ 917 csvtabDisconnect, /* xDestroy */ 918 csvtabOpen, /* xOpen - open a cursor */ 919 csvtabClose, /* xClose - close a cursor */ 920 csvtabFilter, /* xFilter - configure scan constraints */ 921 csvtabNext, /* xNext - advance a cursor */ 922 csvtabEof, /* xEof - check for end of scan */ 923 csvtabColumn, /* xColumn - read data */ 924 csvtabRowid, /* xRowid - read data */ 925 csvtabUpdate, /* xUpdate */ 926 0, /* xBegin */ 927 0, /* xSync */ 928 0, /* xCommit */ 929 0, /* xRollback */ 930 0, /* xFindMethod */ 931 0, /* xRename */ 932 }; 933 #endif /* SQLITE_TEST */ 934 935 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ 936 937 938 #ifdef _WIN32 939 __declspec(dllexport) 940 #endif 941 /* 942 ** This routine is called when the extension is loaded. The new 943 ** CSV virtual table module is registered with the calling database 944 ** connection. 945 */ 946 int sqlite3_csv_init( 947 sqlite3 *db, 948 char **pzErrMsg, 949 const sqlite3_api_routines *pApi 950 ){ 951 #ifndef SQLITE_OMIT_VIRTUALTABLE 952 int rc; 953 SQLITE_EXTENSION_INIT2(pApi); 954 rc = sqlite3_create_module(db, "csv", &CsvModule, 0); 955 #ifdef SQLITE_TEST 956 if( rc==SQLITE_OK ){ 957 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); 958 } 959 #endif 960 return rc; 961 #else 962 return SQLITE_OK; 963 #endif 964 } 965