1 /* 2 ** 2016-05-28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the implementation of an SQLite virtual table for 14 ** reading CSV files. 15 ** 16 ** Usage: 17 ** 18 ** .load ./csv 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); 20 ** SELECT * FROM csv; 21 ** 22 ** The columns are named "c1", "c2", "c3", ... by default. But the 23 ** application can define its own CREATE TABLE statement as an additional 24 ** parameter. For example: 25 ** 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( 27 ** filename = "../http.log", 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" 29 ** ); 30 ** 31 ** Instead of specifying a file, the text of the CSV can be loaded using 32 ** the data= parameter. 33 ** 34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have 35 ** N columns. If the columns parameter is omitted, the CSV file is opened 36 ** as soon as the virtual table is constructed and the first row of the CSV 37 ** is read in order to count the tables. 38 ** 39 ** Some extra debugging features (used for testing virtual tables) are available 40 ** if this module is compiled with -DSQLITE_TEST. 41 */ 42 #include <sqlite3ext.h> 43 SQLITE_EXTENSION_INIT1 44 #include <string.h> 45 #include <stdlib.h> 46 #include <assert.h> 47 #include <stdarg.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #ifndef SQLITE_OMIT_VIRTUALTABLE 52 53 /* 54 ** A macro to hint to the compiler that a function should not be 55 ** inlined. 56 */ 57 #if defined(__GNUC__) 58 # define CSV_NOINLINE __attribute__((noinline)) 59 #elif defined(_MSC_VER) && _MSC_VER>=1310 60 # define CSV_NOINLINE __declspec(noinline) 61 #else 62 # define CSV_NOINLINE 63 #endif 64 65 66 /* Max size of the error message in a CsvReader */ 67 #define CSV_MXERR 200 68 69 /* Size of the CsvReader input buffer */ 70 #define CSV_INBUFSZ 1024 71 72 /* A context object used when read a CSV file. */ 73 typedef struct CsvReader CsvReader; 74 struct CsvReader { 75 FILE *in; /* Read the CSV text from this input stream */ 76 char *z; /* Accumulated text for a field */ 77 int n; /* Number of bytes in z */ 78 int nAlloc; /* Space allocated for z[] */ 79 int nLine; /* Current line number */ 80 int bNotFirst; /* True if prior text has been seen */ 81 int cTerm; /* Character that terminated the most recent field */ 82 size_t iIn; /* Next unread character in the input buffer */ 83 size_t nIn; /* Number of characters in the input buffer */ 84 char *zIn; /* The input buffer */ 85 char zErr[CSV_MXERR]; /* Error message */ 86 }; 87 88 /* Initialize a CsvReader object */ 89 static void csv_reader_init(CsvReader *p){ 90 p->in = 0; 91 p->z = 0; 92 p->n = 0; 93 p->nAlloc = 0; 94 p->nLine = 0; 95 p->bNotFirst = 0; 96 p->nIn = 0; 97 p->zIn = 0; 98 p->zErr[0] = 0; 99 } 100 101 /* Close and reset a CsvReader object */ 102 static void csv_reader_reset(CsvReader *p){ 103 if( p->in ){ 104 fclose(p->in); 105 sqlite3_free(p->zIn); 106 } 107 sqlite3_free(p->z); 108 csv_reader_init(p); 109 } 110 111 /* Report an error on a CsvReader */ 112 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ 113 va_list ap; 114 va_start(ap, zFormat); 115 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); 116 va_end(ap); 117 } 118 119 /* Open the file associated with a CsvReader 120 ** Return the number of errors. 121 */ 122 static int csv_reader_open( 123 CsvReader *p, /* The reader to open */ 124 const char *zFilename, /* Read from this filename */ 125 const char *zData /* ... or use this data */ 126 ){ 127 if( zFilename ){ 128 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); 129 if( p->zIn==0 ){ 130 csv_errmsg(p, "out of memory"); 131 return 1; 132 } 133 p->in = fopen(zFilename, "rb"); 134 if( p->in==0 ){ 135 csv_reader_reset(p); 136 csv_errmsg(p, "cannot open '%s' for reading", zFilename); 137 return 1; 138 } 139 }else{ 140 assert( p->in==0 ); 141 p->zIn = (char*)zData; 142 p->nIn = strlen(zData); 143 } 144 return 0; 145 } 146 147 /* The input buffer has overflowed. Refill the input buffer, then 148 ** return the next character 149 */ 150 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ 151 size_t got; 152 153 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ 154 assert( p->in!=0 ); /* Only called if reading froma file */ 155 156 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); 157 if( got==0 ) return EOF; 158 p->nIn = got; 159 p->iIn = 1; 160 return p->zIn[0]; 161 } 162 163 /* Return the next character of input. Return EOF at end of input. */ 164 static int csv_getc(CsvReader *p){ 165 if( p->iIn >= p->nIn ){ 166 if( p->in!=0 ) return csv_getc_refill(p); 167 return EOF; 168 } 169 return ((unsigned char*)p->zIn)[p->iIn++]; 170 } 171 172 /* Increase the size of p->z and append character c to the end. 173 ** Return 0 on success and non-zero if there is an OOM error */ 174 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ 175 char *zNew; 176 int nNew = p->nAlloc*2 + 100; 177 zNew = sqlite3_realloc64(p->z, nNew); 178 if( zNew ){ 179 p->z = zNew; 180 p->nAlloc = nNew; 181 p->z[p->n++] = c; 182 return 0; 183 }else{ 184 csv_errmsg(p, "out of memory"); 185 return 1; 186 } 187 } 188 189 /* Append a single character to the CsvReader.z[] array. 190 ** Return 0 on success and non-zero if there is an OOM error */ 191 static int csv_append(CsvReader *p, char c){ 192 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); 193 p->z[p->n++] = c; 194 return 0; 195 } 196 197 /* Read a single field of CSV text. Compatible with rfc4180 and extended 198 ** with the option of having a separator other than ",". 199 ** 200 ** + Input comes from p->in. 201 ** + Store results in p->z of length p->n. Space to hold p->z comes 202 ** from sqlite3_malloc64(). 203 ** + Keep track of the line number in p->nLine. 204 ** + Store the character that terminates the field in p->cTerm. Store 205 ** EOF on end-of-file. 206 ** 207 ** Return "" at EOF. Return 0 on an OOM error. 208 */ 209 static char *csv_read_one_field(CsvReader *p){ 210 int c; 211 p->n = 0; 212 c = csv_getc(p); 213 if( c==EOF ){ 214 p->cTerm = EOF; 215 return ""; 216 } 217 if( c=='"' ){ 218 int pc, ppc; 219 int startLine = p->nLine; 220 pc = ppc = 0; 221 while( 1 ){ 222 c = csv_getc(p); 223 if( c<='"' || pc=='"' ){ 224 if( c=='\n' ) p->nLine++; 225 if( c=='"' ){ 226 if( pc=='"' ){ 227 pc = 0; 228 continue; 229 } 230 } 231 if( (c==',' && pc=='"') 232 || (c=='\n' && pc=='"') 233 || (c=='\n' && pc=='\r' && ppc=='"') 234 || (c==EOF && pc=='"') 235 ){ 236 do{ p->n--; }while( p->z[p->n]!='"' ); 237 p->cTerm = (char)c; 238 break; 239 } 240 if( pc=='"' && c!='\r' ){ 241 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); 242 break; 243 } 244 if( c==EOF ){ 245 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", 246 startLine, '"'); 247 p->cTerm = (char)c; 248 break; 249 } 250 } 251 if( csv_append(p, (char)c) ) return 0; 252 ppc = pc; 253 pc = c; 254 } 255 }else{ 256 /* If this is the first field being parsed and it begins with the 257 ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ 258 if( (c&0xff)==0xef && p->bNotFirst==0 ){ 259 csv_append(p, (char)c); 260 c = csv_getc(p); 261 if( (c&0xff)==0xbb ){ 262 csv_append(p, (char)c); 263 c = csv_getc(p); 264 if( (c&0xff)==0xbf ){ 265 p->bNotFirst = 1; 266 p->n = 0; 267 return csv_read_one_field(p); 268 } 269 } 270 } 271 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ 272 if( csv_append(p, (char)c) ) return 0; 273 c = csv_getc(p); 274 } 275 if( c=='\n' ){ 276 p->nLine++; 277 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; 278 } 279 p->cTerm = (char)c; 280 } 281 if( p->z ) p->z[p->n] = 0; 282 p->bNotFirst = 1; 283 return p->z; 284 } 285 286 287 /* Forward references to the various virtual table methods implemented 288 ** in this file. */ 289 static int csvtabCreate(sqlite3*, void*, int, const char*const*, 290 sqlite3_vtab**,char**); 291 static int csvtabConnect(sqlite3*, void*, int, const char*const*, 292 sqlite3_vtab**,char**); 293 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); 294 static int csvtabDisconnect(sqlite3_vtab*); 295 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); 296 static int csvtabClose(sqlite3_vtab_cursor*); 297 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, 298 int argc, sqlite3_value **argv); 299 static int csvtabNext(sqlite3_vtab_cursor*); 300 static int csvtabEof(sqlite3_vtab_cursor*); 301 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); 302 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); 303 304 /* An instance of the CSV virtual table */ 305 typedef struct CsvTable { 306 sqlite3_vtab base; /* Base class. Must be first */ 307 char *zFilename; /* Name of the CSV file */ 308 char *zData; /* Raw CSV data in lieu of zFilename */ 309 long iStart; /* Offset to start of data in zFilename */ 310 int nCol; /* Number of columns in the CSV file */ 311 unsigned int tstFlags; /* Bit values used for testing */ 312 } CsvTable; 313 314 /* Allowed values for tstFlags */ 315 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ 316 317 /* A cursor for the CSV virtual table */ 318 typedef struct CsvCursor { 319 sqlite3_vtab_cursor base; /* Base class. Must be first */ 320 CsvReader rdr; /* The CsvReader object */ 321 char **azVal; /* Value of the current row */ 322 int *aLen; /* Length of each entry */ 323 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ 324 } CsvCursor; 325 326 /* Transfer error message text from a reader into a CsvTable */ 327 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ 328 sqlite3_free(pTab->base.zErrMsg); 329 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); 330 } 331 332 /* 333 ** This method is the destructor fo a CsvTable object. 334 */ 335 static int csvtabDisconnect(sqlite3_vtab *pVtab){ 336 CsvTable *p = (CsvTable*)pVtab; 337 sqlite3_free(p->zFilename); 338 sqlite3_free(p->zData); 339 sqlite3_free(p); 340 return SQLITE_OK; 341 } 342 343 /* Skip leading whitespace. Return a pointer to the first non-whitespace 344 ** character, or to the zero terminator if the string has only whitespace */ 345 static const char *csv_skip_whitespace(const char *z){ 346 while( isspace((unsigned char)z[0]) ) z++; 347 return z; 348 } 349 350 /* Remove trailing whitespace from the end of string z[] */ 351 static void csv_trim_whitespace(char *z){ 352 size_t n = strlen(z); 353 while( n>0 && isspace((unsigned char)z[n]) ) n--; 354 z[n] = 0; 355 } 356 357 /* Dequote the string */ 358 static void csv_dequote(char *z){ 359 int j; 360 char cQuote = z[0]; 361 size_t i, n; 362 363 if( cQuote!='\'' && cQuote!='"' ) return; 364 n = strlen(z); 365 if( n<2 || z[n-1]!=z[0] ) return; 366 for(i=1, j=0; i<n-1; i++){ 367 if( z[i]==cQuote && z[i+1]==cQuote ) i++; 368 z[j++] = z[i]; 369 } 370 z[j] = 0; 371 } 372 373 /* Check to see if the string is of the form: "TAG = VALUE" with optional 374 ** whitespace before and around tokens. If it is, return a pointer to the 375 ** first character of VALUE. If it is not, return NULL. 376 */ 377 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ 378 z = csv_skip_whitespace(z); 379 if( strncmp(zTag, z, nTag)!=0 ) return 0; 380 z = csv_skip_whitespace(z+nTag); 381 if( z[0]!='=' ) return 0; 382 return csv_skip_whitespace(z+1); 383 } 384 385 /* Decode a parameter that requires a dequoted string. 386 ** 387 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned 388 ** even if there is an error. If an error occurs, then an error message 389 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. 390 */ 391 static int csv_string_parameter( 392 CsvReader *p, /* Leave the error message here, if there is one */ 393 const char *zParam, /* Parameter we are checking for */ 394 const char *zArg, /* Raw text of the virtual table argment */ 395 char **pzVal /* Write the dequoted string value here */ 396 ){ 397 const char *zValue; 398 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); 399 if( zValue==0 ) return 0; 400 p->zErr[0] = 0; 401 if( *pzVal ){ 402 csv_errmsg(p, "more than one '%s' parameter", zParam); 403 return 1; 404 } 405 *pzVal = sqlite3_mprintf("%s", zValue); 406 if( *pzVal==0 ){ 407 csv_errmsg(p, "out of memory"); 408 return 1; 409 } 410 csv_trim_whitespace(*pzVal); 411 csv_dequote(*pzVal); 412 return 1; 413 } 414 415 416 /* Return 0 if the argument is false and 1 if it is true. Return -1 if 417 ** we cannot really tell. 418 */ 419 static int csv_boolean(const char *z){ 420 if( sqlite3_stricmp("yes",z)==0 421 || sqlite3_stricmp("on",z)==0 422 || sqlite3_stricmp("true",z)==0 423 || (z[0]=='1' && z[1]==0) 424 ){ 425 return 1; 426 } 427 if( sqlite3_stricmp("no",z)==0 428 || sqlite3_stricmp("off",z)==0 429 || sqlite3_stricmp("false",z)==0 430 || (z[0]=='0' && z[1]==0) 431 ){ 432 return 0; 433 } 434 return -1; 435 } 436 437 438 /* 439 ** Parameters: 440 ** filename=FILENAME Name of file containing CSV content 441 ** data=TEXT Direct CSV content. 442 ** schema=SCHEMA Alternative CSV schema. 443 ** header=YES|NO First row of CSV defines the names of 444 ** columns if "yes". Default "no". 445 ** columns=N Assume the CSV file contains N columns. 446 ** 447 ** Only available if compiled with SQLITE_TEST: 448 ** 449 ** testflags=N Bitmask of test flags. Optional 450 ** 451 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", 452 ** and so forth. If columns=N is omitted, then the file is opened and 453 ** the number of columns in the first row is counted to determine the 454 ** column count. If header=YES, then the first row is skipped. 455 */ 456 static int csvtabConnect( 457 sqlite3 *db, 458 void *pAux, 459 int argc, const char *const*argv, 460 sqlite3_vtab **ppVtab, 461 char **pzErr 462 ){ 463 CsvTable *pNew = 0; /* The CsvTable object to construct */ 464 int bHeader = -1; /* header= flags. -1 means not seen yet */ 465 int rc = SQLITE_OK; /* Result code from this routine */ 466 int i, j; /* Loop counters */ 467 #ifdef SQLITE_TEST 468 int tstFlags = 0; /* Value for testflags=N parameter */ 469 #endif 470 int nCol = -99; /* Value of the columns= parameter */ 471 CsvReader sRdr; /* A CSV file reader used to store an error 472 ** message and/or to count the number of columns */ 473 static const char *azParam[] = { 474 "filename", "data", "schema", 475 }; 476 char *azPValue[3]; /* Parameter values */ 477 # define CSV_FILENAME (azPValue[0]) 478 # define CSV_DATA (azPValue[1]) 479 # define CSV_SCHEMA (azPValue[2]) 480 481 482 assert( sizeof(azPValue)==sizeof(azParam) ); 483 memset(&sRdr, 0, sizeof(sRdr)); 484 memset(azPValue, 0, sizeof(azPValue)); 485 for(i=3; i<argc; i++){ 486 const char *z = argv[i]; 487 const char *zValue; 488 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ 489 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; 490 } 491 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ 492 if( sRdr.zErr[0] ) goto csvtab_connect_error; 493 }else 494 if( (zValue = csv_parameter("header",6,z))!=0 ){ 495 int x; 496 if( bHeader>=0 ){ 497 csv_errmsg(&sRdr, "more than one 'header' parameter"); 498 goto csvtab_connect_error; 499 } 500 x = csv_boolean(zValue); 501 if( x==1 ){ 502 bHeader = 1; 503 }else if( x==0 ){ 504 bHeader = 0; 505 }else{ 506 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); 507 goto csvtab_connect_error; 508 } 509 }else 510 #ifdef SQLITE_TEST 511 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ 512 tstFlags = (unsigned int)atoi(zValue); 513 }else 514 #endif 515 if( (zValue = csv_parameter("columns",7,z))!=0 ){ 516 if( nCol>0 ){ 517 csv_errmsg(&sRdr, "more than one 'columns' parameter"); 518 goto csvtab_connect_error; 519 } 520 nCol = atoi(zValue); 521 if( nCol<=0 ){ 522 csv_errmsg(&sRdr, "must have at least one column"); 523 goto csvtab_connect_error; 524 } 525 }else 526 { 527 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); 528 goto csvtab_connect_error; 529 } 530 } 531 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ 532 csv_errmsg(&sRdr, "must either filename= or data= but not both"); 533 goto csvtab_connect_error; 534 } 535 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ 536 goto csvtab_connect_error; 537 } 538 pNew = sqlite3_malloc( sizeof(*pNew) ); 539 *ppVtab = (sqlite3_vtab*)pNew; 540 if( pNew==0 ) goto csvtab_connect_oom; 541 memset(pNew, 0, sizeof(*pNew)); 542 if( nCol>0 ){ 543 pNew->nCol = nCol; 544 }else{ 545 do{ 546 const char *z = csv_read_one_field(&sRdr); 547 if( z==0 ) goto csvtab_connect_oom; 548 pNew->nCol++; 549 }while( sRdr.cTerm==',' ); 550 } 551 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; 552 pNew->zData = CSV_DATA; CSV_DATA = 0; 553 #ifdef SQLITE_TEST 554 pNew->tstFlags = tstFlags; 555 #endif 556 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; 557 csv_reader_reset(&sRdr); 558 if( CSV_SCHEMA==0 ){ 559 char *zSep = ""; 560 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); 561 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; 562 for(i=0; i<pNew->nCol; i++){ 563 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); 564 zSep = ","; 565 } 566 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); 567 } 568 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); 569 if( rc ) goto csvtab_connect_error; 570 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 571 sqlite3_free(azPValue[i]); 572 } 573 return SQLITE_OK; 574 575 csvtab_connect_oom: 576 rc = SQLITE_NOMEM; 577 csv_errmsg(&sRdr, "out of memory"); 578 579 csvtab_connect_error: 580 if( pNew ) csvtabDisconnect(&pNew->base); 581 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 582 sqlite3_free(azPValue[i]); 583 } 584 if( sRdr.zErr[0] ){ 585 sqlite3_free(*pzErr); 586 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); 587 } 588 csv_reader_reset(&sRdr); 589 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; 590 return rc; 591 } 592 593 /* 594 ** Reset the current row content held by a CsvCursor. 595 */ 596 static void csvtabCursorRowReset(CsvCursor *pCur){ 597 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; 598 int i; 599 for(i=0; i<pTab->nCol; i++){ 600 sqlite3_free(pCur->azVal[i]); 601 pCur->azVal[i] = 0; 602 pCur->aLen[i] = 0; 603 } 604 } 605 606 /* 607 ** The xConnect and xCreate methods do the same thing, but they must be 608 ** different so that the virtual table is not an eponymous virtual table. 609 */ 610 static int csvtabCreate( 611 sqlite3 *db, 612 void *pAux, 613 int argc, const char *const*argv, 614 sqlite3_vtab **ppVtab, 615 char **pzErr 616 ){ 617 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); 618 } 619 620 /* 621 ** Destructor for a CsvCursor. 622 */ 623 static int csvtabClose(sqlite3_vtab_cursor *cur){ 624 CsvCursor *pCur = (CsvCursor*)cur; 625 csvtabCursorRowReset(pCur); 626 csv_reader_reset(&pCur->rdr); 627 sqlite3_free(cur); 628 return SQLITE_OK; 629 } 630 631 /* 632 ** Constructor for a new CsvTable cursor object. 633 */ 634 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ 635 CsvTable *pTab = (CsvTable*)p; 636 CsvCursor *pCur; 637 size_t nByte; 638 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; 639 pCur = sqlite3_malloc64( nByte ); 640 if( pCur==0 ) return SQLITE_NOMEM; 641 memset(pCur, 0, nByte); 642 pCur->azVal = (char**)&pCur[1]; 643 pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; 644 *ppCursor = &pCur->base; 645 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ 646 csv_xfer_error(pTab, &pCur->rdr); 647 return SQLITE_ERROR; 648 } 649 return SQLITE_OK; 650 } 651 652 653 /* 654 ** Advance a CsvCursor to its next row of input. 655 ** Set the EOF marker if we reach the end of input. 656 */ 657 static int csvtabNext(sqlite3_vtab_cursor *cur){ 658 CsvCursor *pCur = (CsvCursor*)cur; 659 CsvTable *pTab = (CsvTable*)cur->pVtab; 660 int i = 0; 661 char *z; 662 do{ 663 z = csv_read_one_field(&pCur->rdr); 664 if( z==0 ){ 665 csv_xfer_error(pTab, &pCur->rdr); 666 break; 667 } 668 if( i<pTab->nCol ){ 669 if( pCur->aLen[i] < pCur->rdr.n+1 ){ 670 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); 671 if( zNew==0 ){ 672 csv_errmsg(&pCur->rdr, "out of memory"); 673 csv_xfer_error(pTab, &pCur->rdr); 674 break; 675 } 676 pCur->azVal[i] = zNew; 677 pCur->aLen[i] = pCur->rdr.n+1; 678 } 679 memcpy(pCur->azVal[i], z, pCur->rdr.n+1); 680 i++; 681 } 682 }while( pCur->rdr.cTerm==',' ); 683 if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){ 684 pCur->iRowid = -1; 685 }else{ 686 pCur->iRowid++; 687 while( i<pTab->nCol ){ 688 sqlite3_free(pCur->azVal[i]); 689 pCur->azVal[i] = 0; 690 pCur->aLen[i] = 0; 691 i++; 692 } 693 } 694 return SQLITE_OK; 695 } 696 697 /* 698 ** Return values of columns for the row at which the CsvCursor 699 ** is currently pointing. 700 */ 701 static int csvtabColumn( 702 sqlite3_vtab_cursor *cur, /* The cursor */ 703 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ 704 int i /* Which column to return */ 705 ){ 706 CsvCursor *pCur = (CsvCursor*)cur; 707 CsvTable *pTab = (CsvTable*)cur->pVtab; 708 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ 709 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); 710 } 711 return SQLITE_OK; 712 } 713 714 /* 715 ** Return the rowid for the current row. 716 */ 717 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 718 CsvCursor *pCur = (CsvCursor*)cur; 719 *pRowid = pCur->iRowid; 720 return SQLITE_OK; 721 } 722 723 /* 724 ** Return TRUE if the cursor has been moved off of the last 725 ** row of output. 726 */ 727 static int csvtabEof(sqlite3_vtab_cursor *cur){ 728 CsvCursor *pCur = (CsvCursor*)cur; 729 return pCur->iRowid<0; 730 } 731 732 /* 733 ** Only a full table scan is supported. So xFilter simply rewinds to 734 ** the beginning. 735 */ 736 static int csvtabFilter( 737 sqlite3_vtab_cursor *pVtabCursor, 738 int idxNum, const char *idxStr, 739 int argc, sqlite3_value **argv 740 ){ 741 CsvCursor *pCur = (CsvCursor*)pVtabCursor; 742 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; 743 pCur->iRowid = 0; 744 if( pCur->rdr.in==0 ){ 745 assert( pCur->rdr.zIn==pTab->zData ); 746 assert( pTab->iStart>=0 ); 747 assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); 748 pCur->rdr.iIn = pTab->iStart; 749 }else{ 750 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); 751 pCur->rdr.iIn = 0; 752 pCur->rdr.nIn = 0; 753 } 754 return csvtabNext(pVtabCursor); 755 } 756 757 /* 758 ** Only a forward full table scan is supported. xBestIndex is mostly 759 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality 760 ** constraints lowers the estimated cost, which is fiction, but is useful 761 ** for testing certain kinds of virtual table behavior. 762 */ 763 static int csvtabBestIndex( 764 sqlite3_vtab *tab, 765 sqlite3_index_info *pIdxInfo 766 ){ 767 pIdxInfo->estimatedCost = 1000000; 768 #ifdef SQLITE_TEST 769 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ 770 /* The usual (and sensible) case is to always do a full table scan. 771 ** The code in this branch only runs when testflags=1. This code 772 ** generates an artifical and unrealistic plan which is useful 773 ** for testing virtual table logic but is not helpful to real applications. 774 ** 775 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual 776 ** table (even though it is not) and the cost of running the virtual table 777 ** is reduced from 1 million to just 10. The constraints are *not* marked 778 ** as omittable, however, so the query planner should still generate a 779 ** plan that gives a correct answer, even if they plan is not optimal. 780 */ 781 int i; 782 int nConst = 0; 783 for(i=0; i<pIdxInfo->nConstraint; i++){ 784 unsigned char op; 785 if( pIdxInfo->aConstraint[i].usable==0 ) continue; 786 op = pIdxInfo->aConstraint[i].op; 787 if( op==SQLITE_INDEX_CONSTRAINT_EQ 788 || op==SQLITE_INDEX_CONSTRAINT_LIKE 789 || op==SQLITE_INDEX_CONSTRAINT_GLOB 790 ){ 791 pIdxInfo->estimatedCost = 10; 792 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; 793 nConst++; 794 } 795 } 796 } 797 #endif 798 return SQLITE_OK; 799 } 800 801 802 static sqlite3_module CsvModule = { 803 0, /* iVersion */ 804 csvtabCreate, /* xCreate */ 805 csvtabConnect, /* xConnect */ 806 csvtabBestIndex, /* xBestIndex */ 807 csvtabDisconnect, /* xDisconnect */ 808 csvtabDisconnect, /* xDestroy */ 809 csvtabOpen, /* xOpen - open a cursor */ 810 csvtabClose, /* xClose - close a cursor */ 811 csvtabFilter, /* xFilter - configure scan constraints */ 812 csvtabNext, /* xNext - advance a cursor */ 813 csvtabEof, /* xEof - check for end of scan */ 814 csvtabColumn, /* xColumn - read data */ 815 csvtabRowid, /* xRowid - read data */ 816 0, /* xUpdate */ 817 0, /* xBegin */ 818 0, /* xSync */ 819 0, /* xCommit */ 820 0, /* xRollback */ 821 0, /* xFindMethod */ 822 0, /* xRename */ 823 }; 824 825 #ifdef SQLITE_TEST 826 /* 827 ** For virtual table testing, make a version of the CSV virtual table 828 ** available that has an xUpdate function. But the xUpdate always returns 829 ** SQLITE_READONLY since the CSV file is not really writable. 830 */ 831 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ 832 return SQLITE_READONLY; 833 } 834 static sqlite3_module CsvModuleFauxWrite = { 835 0, /* iVersion */ 836 csvtabCreate, /* xCreate */ 837 csvtabConnect, /* xConnect */ 838 csvtabBestIndex, /* xBestIndex */ 839 csvtabDisconnect, /* xDisconnect */ 840 csvtabDisconnect, /* xDestroy */ 841 csvtabOpen, /* xOpen - open a cursor */ 842 csvtabClose, /* xClose - close a cursor */ 843 csvtabFilter, /* xFilter - configure scan constraints */ 844 csvtabNext, /* xNext - advance a cursor */ 845 csvtabEof, /* xEof - check for end of scan */ 846 csvtabColumn, /* xColumn - read data */ 847 csvtabRowid, /* xRowid - read data */ 848 csvtabUpdate, /* xUpdate */ 849 0, /* xBegin */ 850 0, /* xSync */ 851 0, /* xCommit */ 852 0, /* xRollback */ 853 0, /* xFindMethod */ 854 0, /* xRename */ 855 }; 856 #endif /* SQLITE_TEST */ 857 858 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ 859 860 861 #ifdef _WIN32 862 __declspec(dllexport) 863 #endif 864 /* 865 ** This routine is called when the extension is loaded. The new 866 ** CSV virtual table module is registered with the calling database 867 ** connection. 868 */ 869 int sqlite3_csv_init( 870 sqlite3 *db, 871 char **pzErrMsg, 872 const sqlite3_api_routines *pApi 873 ){ 874 #ifndef SQLITE_OMIT_VIRTUALTABLE 875 int rc; 876 SQLITE_EXTENSION_INIT2(pApi); 877 rc = sqlite3_create_module(db, "csv", &CsvModule, 0); 878 #ifdef SQLITE_TEST 879 if( rc==SQLITE_OK ){ 880 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); 881 } 882 #endif 883 return rc; 884 #else 885 return SQLITE_OK; 886 #endif 887 } 888