1 /* 2 ** 2016-05-28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the implementation of an SQLite virtual table for 14 ** reading CSV files. 15 ** 16 ** Usage: 17 ** 18 ** .load ./csv 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); 20 ** SELECT * FROM csv; 21 ** 22 ** The columns are named "c1", "c2", "c3", ... by default. But the 23 ** application can define its own CREATE TABLE statement as an additional 24 ** parameter. For example: 25 ** 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( 27 ** filename = "../http.log", 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" 29 ** ); 30 ** 31 ** Instead of specifying a file, the text of the CSV can be loaded using 32 ** the data= parameter. 33 ** 34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have 35 ** N columns. If the columns parameter is omitted, the CSV file is opened 36 ** as soon as the virtual table is constructed and the first row of the CSV 37 ** is read in order to count the tables. 38 ** 39 ** Some extra debugging features (used for testing virtual tables) are available 40 ** if this module is compiled with -DSQLITE_TEST. 41 */ 42 #include <sqlite3ext.h> 43 SQLITE_EXTENSION_INIT1 44 #include <string.h> 45 #include <stdlib.h> 46 #include <assert.h> 47 #include <stdarg.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 /* 52 ** A macro to hint to the compiler that a function should not be 53 ** inlined. 54 */ 55 #if defined(__GNUC__) 56 # define CSV_NOINLINE __attribute__((noinline)) 57 #elif defined(_MSC_VER) && _MSC_VER>=1310 58 # define CSV_NOINLINE __declspec(noinline) 59 #else 60 # define CSV_NOINLINE 61 #endif 62 63 64 /* Max size of the error message in a CsvReader */ 65 #define CSV_MXERR 200 66 67 /* Size of the CsvReader input buffer */ 68 #define CSV_INBUFSZ 1024 69 70 /* A context object used when read a CSV file. */ 71 typedef struct CsvReader CsvReader; 72 struct CsvReader { 73 FILE *in; /* Read the CSV text from this input stream */ 74 char *z; /* Accumulated text for a field */ 75 int n; /* Number of bytes in z */ 76 int nAlloc; /* Space allocated for z[] */ 77 int nLine; /* Current line number */ 78 char cTerm; /* Character that terminated the most recent field */ 79 size_t iIn; /* Next unread character in the input buffer */ 80 size_t nIn; /* Number of characters in the input buffer */ 81 char *zIn; /* The input buffer */ 82 char zErr[CSV_MXERR]; /* Error message */ 83 }; 84 85 /* Initialize a CsvReader object */ 86 static void csv_reader_init(CsvReader *p){ 87 p->in = 0; 88 p->z = 0; 89 p->n = 0; 90 p->nAlloc = 0; 91 p->nLine = 0; 92 p->nIn = 0; 93 p->zIn = 0; 94 p->zErr[0] = 0; 95 } 96 97 /* Close and reset a CsvReader object */ 98 static void csv_reader_reset(CsvReader *p){ 99 if( p->in ){ 100 fclose(p->in); 101 sqlite3_free(p->zIn); 102 } 103 sqlite3_free(p->z); 104 csv_reader_init(p); 105 } 106 107 /* Report an error on a CsvReader */ 108 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ 109 va_list ap; 110 va_start(ap, zFormat); 111 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); 112 va_end(ap); 113 } 114 115 /* Open the file associated with a CsvReader 116 ** Return the number of errors. 117 */ 118 static int csv_reader_open( 119 CsvReader *p, /* The reader to open */ 120 const char *zFilename, /* Read from this filename */ 121 const char *zData /* ... or use this data */ 122 ){ 123 if( zFilename ){ 124 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); 125 if( p->zIn==0 ){ 126 csv_errmsg(p, "out of memory"); 127 return 1; 128 } 129 p->in = fopen(zFilename, "rb"); 130 if( p->in==0 ){ 131 csv_reader_reset(p); 132 csv_errmsg(p, "cannot open '%s' for reading", zFilename); 133 return 1; 134 } 135 }else{ 136 assert( p->in==0 ); 137 p->zIn = (char*)zData; 138 p->nIn = strlen(zData); 139 } 140 return 0; 141 } 142 143 /* The input buffer has overflowed. Refill the input buffer, then 144 ** return the next character 145 */ 146 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ 147 size_t got; 148 149 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ 150 assert( p->in!=0 ); /* Only called if reading froma file */ 151 152 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); 153 if( got==0 ) return EOF; 154 p->nIn = got; 155 p->iIn = 1; 156 return p->zIn[0]; 157 } 158 159 /* Return the next character of input. Return EOF at end of input. */ 160 static int csv_getc(CsvReader *p){ 161 if( p->iIn >= p->nIn ){ 162 if( p->in!=0 ) return csv_getc_refill(p); 163 return EOF; 164 } 165 return p->zIn[p->iIn++]; 166 } 167 168 /* Increase the size of p->z and append character c to the end. 169 ** Return 0 on success and non-zero if there is an OOM error */ 170 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ 171 char *zNew; 172 int nNew = p->nAlloc*2 + 100; 173 zNew = sqlite3_realloc64(p->z, nNew); 174 if( zNew ){ 175 p->z = zNew; 176 p->nAlloc = nNew; 177 p->z[p->n++] = c; 178 return 0; 179 }else{ 180 csv_errmsg(p, "out of memory"); 181 return 1; 182 } 183 } 184 185 /* Append a single character to the CsvReader.z[] array. 186 ** Return 0 on success and non-zero if there is an OOM error */ 187 static int csv_append(CsvReader *p, char c){ 188 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); 189 p->z[p->n++] = c; 190 return 0; 191 } 192 193 /* Read a single field of CSV text. Compatible with rfc4180 and extended 194 ** with the option of having a separator other than ",". 195 ** 196 ** + Input comes from p->in. 197 ** + Store results in p->z of length p->n. Space to hold p->z comes 198 ** from sqlite3_malloc64(). 199 ** + Keep track of the line number in p->nLine. 200 ** + Store the character that terminates the field in p->cTerm. Store 201 ** EOF on end-of-file. 202 ** 203 ** Return "" at EOF. Return 0 on an OOM error. 204 */ 205 static char *csv_read_one_field(CsvReader *p){ 206 int c; 207 p->n = 0; 208 c = csv_getc(p); 209 if( c==EOF ){ 210 p->cTerm = EOF; 211 return ""; 212 } 213 if( c=='"' ){ 214 int pc, ppc; 215 int startLine = p->nLine; 216 pc = ppc = 0; 217 while( 1 ){ 218 c = csv_getc(p); 219 if( c<='"' || pc=='"' ){ 220 if( c=='\n' ) p->nLine++; 221 if( c=='"' ){ 222 if( pc=='"' ){ 223 pc = 0; 224 continue; 225 } 226 } 227 if( (c==',' && pc=='"') 228 || (c=='\n' && pc=='"') 229 || (c=='\n' && pc=='\r' && ppc=='"') 230 || (c==EOF && pc=='"') 231 ){ 232 do{ p->n--; }while( p->z[p->n]!='"' ); 233 p->cTerm = c; 234 break; 235 } 236 if( pc=='"' && c!='\r' ){ 237 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); 238 break; 239 } 240 if( c==EOF ){ 241 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", 242 startLine, '"'); 243 p->cTerm = c; 244 break; 245 } 246 } 247 if( csv_append(p, (char)c) ) return 0; 248 ppc = pc; 249 pc = c; 250 } 251 }else{ 252 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ 253 if( csv_append(p, (char)c) ) return 0; 254 c = csv_getc(p); 255 } 256 if( c=='\n' ){ 257 p->nLine++; 258 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; 259 } 260 p->cTerm = c; 261 } 262 if( p->z ) p->z[p->n] = 0; 263 return p->z; 264 } 265 266 267 /* Forward references to the various virtual table methods implemented 268 ** in this file. */ 269 static int csvtabCreate(sqlite3*, void*, int, const char*const*, 270 sqlite3_vtab**,char**); 271 static int csvtabConnect(sqlite3*, void*, int, const char*const*, 272 sqlite3_vtab**,char**); 273 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); 274 static int csvtabDisconnect(sqlite3_vtab*); 275 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); 276 static int csvtabClose(sqlite3_vtab_cursor*); 277 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, 278 int argc, sqlite3_value **argv); 279 static int csvtabNext(sqlite3_vtab_cursor*); 280 static int csvtabEof(sqlite3_vtab_cursor*); 281 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); 282 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); 283 284 /* An instance of the CSV virtual table */ 285 typedef struct CsvTable { 286 sqlite3_vtab base; /* Base class. Must be first */ 287 char *zFilename; /* Name of the CSV file */ 288 char *zData; /* Raw CSV data in lieu of zFilename */ 289 long iStart; /* Offset to start of data in zFilename */ 290 int nCol; /* Number of columns in the CSV file */ 291 unsigned int tstFlags; /* Bit values used for testing */ 292 } CsvTable; 293 294 /* Allowed values for tstFlags */ 295 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ 296 297 /* A cursor for the CSV virtual table */ 298 typedef struct CsvCursor { 299 sqlite3_vtab_cursor base; /* Base class. Must be first */ 300 CsvReader rdr; /* The CsvReader object */ 301 char **azVal; /* Value of the current row */ 302 int *aLen; /* Length of each entry */ 303 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ 304 } CsvCursor; 305 306 /* Transfer error message text from a reader into a CsvTable */ 307 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ 308 sqlite3_free(pTab->base.zErrMsg); 309 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); 310 } 311 312 /* 313 ** This method is the destructor fo a CsvTable object. 314 */ 315 static int csvtabDisconnect(sqlite3_vtab *pVtab){ 316 CsvTable *p = (CsvTable*)pVtab; 317 sqlite3_free(p->zFilename); 318 sqlite3_free(p->zData); 319 sqlite3_free(p); 320 return SQLITE_OK; 321 } 322 323 /* Skip leading whitespace. Return a pointer to the first non-whitespace 324 ** character, or to the zero terminator if the string has only whitespace */ 325 static const char *csv_skip_whitespace(const char *z){ 326 while( isspace((unsigned char)z[0]) ) z++; 327 return z; 328 } 329 330 /* Remove trailing whitespace from the end of string z[] */ 331 static void csv_trim_whitespace(char *z){ 332 size_t n = strlen(z); 333 while( n>0 && isspace((unsigned char)z[n]) ) n--; 334 z[n] = 0; 335 } 336 337 /* Dequote the string */ 338 static void csv_dequote(char *z){ 339 int i, j; 340 char cQuote = z[0]; 341 size_t n; 342 343 if( cQuote!='\'' && cQuote!='"' ) return; 344 n = strlen(z); 345 if( n<2 || z[n-1]!=z[0] ) return; 346 for(i=1, j=0; i<n-1; i++){ 347 if( z[i]==cQuote && z[i+1]==cQuote ) i++; 348 z[j++] = z[i]; 349 } 350 z[j] = 0; 351 } 352 353 /* Check to see if the string is of the form: "TAG = VALUE" with optional 354 ** whitespace before and around tokens. If it is, return a pointer to the 355 ** first character of VALUE. If it is not, return NULL. 356 */ 357 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ 358 z = csv_skip_whitespace(z); 359 if( strncmp(zTag, z, nTag)!=0 ) return 0; 360 z = csv_skip_whitespace(z+nTag); 361 if( z[0]!='=' ) return 0; 362 return csv_skip_whitespace(z+1); 363 } 364 365 /* Decode a parameter that requires a dequoted string. 366 ** 367 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned 368 ** even if there is an error. If an error occurs, then an error message 369 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. 370 */ 371 static int csv_string_parameter( 372 CsvReader *p, /* Leave the error message here, if there is one */ 373 const char *zParam, /* Parameter we are checking for */ 374 const char *zArg, /* Raw text of the virtual table argment */ 375 char **pzVal /* Write the dequoted string value here */ 376 ){ 377 const char *zValue; 378 zValue = csv_parameter(zParam,strlen(zParam),zArg); 379 if( zValue==0 ) return 0; 380 p->zErr[0] = 0; 381 if( *pzVal ){ 382 csv_errmsg(p, "more than one '%s' parameter", zParam); 383 return 1; 384 } 385 *pzVal = sqlite3_mprintf("%s", zValue); 386 if( *pzVal==0 ){ 387 csv_errmsg(p, "out of memory"); 388 return 1; 389 } 390 csv_trim_whitespace(*pzVal); 391 csv_dequote(*pzVal); 392 return 1; 393 } 394 395 396 /* Return 0 if the argument is false and 1 if it is true. Return -1 if 397 ** we cannot really tell. 398 */ 399 static int csv_boolean(const char *z){ 400 if( sqlite3_stricmp("yes",z)==0 401 || sqlite3_stricmp("on",z)==0 402 || sqlite3_stricmp("true",z)==0 403 || (z[0]=='1' && z[0]==0) 404 ){ 405 return 1; 406 } 407 if( sqlite3_stricmp("no",z)==0 408 || sqlite3_stricmp("off",z)==0 409 || sqlite3_stricmp("false",z)==0 410 || (z[0]=='0' && z[1]==0) 411 ){ 412 return 0; 413 } 414 return -1; 415 } 416 417 418 /* 419 ** Parameters: 420 ** filename=FILENAME Name of file containing CSV content 421 ** data=TEXT Direct CSV content. 422 ** schema=SCHEMA Alternative CSV schema. 423 ** header=YES|NO First row of CSV defines the names of 424 ** columns if "yes". Default "no". 425 ** columns=N Assume the CSV file contains N columns. 426 ** 427 ** Only available if compiled with SQLITE_TEST: 428 ** 429 ** testflags=N Bitmask of test flags. Optional 430 ** 431 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", 432 ** and so forth. If columns=N is omitted, then the file is opened and 433 ** the number of columns in the first row is counted to determine the 434 ** column count. If header=YES, then the first row is skipped. 435 */ 436 static int csvtabConnect( 437 sqlite3 *db, 438 void *pAux, 439 int argc, const char *const*argv, 440 sqlite3_vtab **ppVtab, 441 char **pzErr 442 ){ 443 CsvTable *pNew = 0; /* The CsvTable object to construct */ 444 int bHeader = -1; /* header= flags. -1 means not seen yet */ 445 int rc = SQLITE_OK; /* Result code from this routine */ 446 int i, j; /* Loop counters */ 447 #ifdef SQLITE_TEST 448 int tstFlags = 0; /* Value for testflags=N parameter */ 449 #endif 450 int nCol = -99; /* Value of the columns= parameter */ 451 CsvReader sRdr; /* A CSV file reader used to store an error 452 ** message and/or to count the number of columns */ 453 static const char *azParam[] = { 454 "filename", "data", "schema", 455 }; 456 char *azPValue[3]; /* Parameter values */ 457 # define CSV_FILENAME (azPValue[0]) 458 # define CSV_DATA (azPValue[1]) 459 # define CSV_SCHEMA (azPValue[2]) 460 461 462 assert( sizeof(azPValue)==sizeof(azParam) ); 463 memset(&sRdr, 0, sizeof(sRdr)); 464 memset(azPValue, 0, sizeof(azPValue)); 465 for(i=3; i<argc; i++){ 466 const char *z = argv[i]; 467 const char *zValue; 468 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ 469 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; 470 } 471 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ 472 if( sRdr.zErr[0] ) goto csvtab_connect_error; 473 }else 474 if( (zValue = csv_parameter("header",6,z))!=0 ){ 475 int x; 476 if( bHeader>=0 ){ 477 csv_errmsg(&sRdr, "more than one 'header' parameter"); 478 goto csvtab_connect_error; 479 } 480 x = csv_boolean(zValue); 481 if( x==1 ){ 482 bHeader = 1; 483 }else if( x==0 ){ 484 bHeader = 0; 485 }else{ 486 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); 487 goto csvtab_connect_error; 488 } 489 }else 490 #ifdef SQLITE_TEST 491 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ 492 tstFlags = (unsigned int)atoi(zValue); 493 }else 494 #endif 495 if( (zValue = csv_parameter("columns",7,z))!=0 ){ 496 if( nCol>0 ){ 497 csv_errmsg(&sRdr, "more than one 'columns' parameter"); 498 goto csvtab_connect_error; 499 } 500 nCol = atoi(zValue); 501 if( nCol<=0 ){ 502 csv_errmsg(&sRdr, "must have at least one column"); 503 goto csvtab_connect_error; 504 } 505 }else 506 { 507 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); 508 goto csvtab_connect_error; 509 } 510 } 511 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ 512 csv_errmsg(&sRdr, "must either filename= or data= but not both"); 513 goto csvtab_connect_error; 514 } 515 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ 516 goto csvtab_connect_error; 517 } 518 pNew = sqlite3_malloc( sizeof(*pNew) ); 519 *ppVtab = (sqlite3_vtab*)pNew; 520 if( pNew==0 ) goto csvtab_connect_oom; 521 memset(pNew, 0, sizeof(*pNew)); 522 if( nCol>0 ){ 523 pNew->nCol = nCol; 524 }else{ 525 do{ 526 const char *z = csv_read_one_field(&sRdr); 527 if( z==0 ) goto csvtab_connect_oom; 528 pNew->nCol++; 529 }while( sRdr.cTerm==',' ); 530 } 531 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; 532 pNew->zData = CSV_DATA; CSV_DATA = 0; 533 #ifdef SQLITE_TEST 534 pNew->tstFlags = tstFlags; 535 #endif 536 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; 537 csv_reader_reset(&sRdr); 538 if( CSV_SCHEMA==0 ){ 539 char *zSep = ""; 540 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); 541 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; 542 for(i=0; i<pNew->nCol; i++){ 543 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); 544 zSep = ","; 545 } 546 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); 547 } 548 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); 549 if( rc ) goto csvtab_connect_error; 550 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 551 sqlite3_free(azPValue[i]); 552 } 553 return SQLITE_OK; 554 555 csvtab_connect_oom: 556 rc = SQLITE_NOMEM; 557 csv_errmsg(&sRdr, "out of memory"); 558 559 csvtab_connect_error: 560 if( pNew ) csvtabDisconnect(&pNew->base); 561 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 562 sqlite3_free(azPValue[i]); 563 } 564 if( sRdr.zErr[0] ){ 565 sqlite3_free(*pzErr); 566 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); 567 } 568 csv_reader_reset(&sRdr); 569 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; 570 return rc; 571 } 572 573 /* 574 ** Reset the current row content held by a CsvCursor. 575 */ 576 static void csvtabCursorRowReset(CsvCursor *pCur){ 577 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; 578 int i; 579 for(i=0; i<pTab->nCol; i++){ 580 sqlite3_free(pCur->azVal[i]); 581 pCur->azVal[i] = 0; 582 pCur->aLen[i] = 0; 583 } 584 } 585 586 /* 587 ** The xConnect and xCreate methods do the same thing, but they must be 588 ** different so that the virtual table is not an eponymous virtual table. 589 */ 590 static int csvtabCreate( 591 sqlite3 *db, 592 void *pAux, 593 int argc, const char *const*argv, 594 sqlite3_vtab **ppVtab, 595 char **pzErr 596 ){ 597 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); 598 } 599 600 /* 601 ** Destructor for a CsvCursor. 602 */ 603 static int csvtabClose(sqlite3_vtab_cursor *cur){ 604 CsvCursor *pCur = (CsvCursor*)cur; 605 csvtabCursorRowReset(pCur); 606 csv_reader_reset(&pCur->rdr); 607 sqlite3_free(cur); 608 return SQLITE_OK; 609 } 610 611 /* 612 ** Constructor for a new CsvTable cursor object. 613 */ 614 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ 615 CsvTable *pTab = (CsvTable*)p; 616 CsvCursor *pCur; 617 size_t nByte; 618 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; 619 pCur = sqlite3_malloc( nByte ); 620 if( pCur==0 ) return SQLITE_NOMEM; 621 memset(pCur, 0, nByte); 622 pCur->azVal = (char**)&pCur[1]; 623 pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; 624 *ppCursor = &pCur->base; 625 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ 626 csv_xfer_error(pTab, &pCur->rdr); 627 return SQLITE_ERROR; 628 } 629 return SQLITE_OK; 630 } 631 632 633 /* 634 ** Advance a CsvCursor to its next row of input. 635 ** Set the EOF marker if we reach the end of input. 636 */ 637 static int csvtabNext(sqlite3_vtab_cursor *cur){ 638 CsvCursor *pCur = (CsvCursor*)cur; 639 CsvTable *pTab = (CsvTable*)cur->pVtab; 640 int i = 0; 641 char *z; 642 do{ 643 z = csv_read_one_field(&pCur->rdr); 644 if( z==0 ){ 645 csv_xfer_error(pTab, &pCur->rdr); 646 break; 647 } 648 if( i<pTab->nCol ){ 649 if( pCur->aLen[i] < pCur->rdr.n+1 ){ 650 char *zNew = sqlite3_realloc(pCur->azVal[i], pCur->rdr.n+1); 651 if( zNew==0 ){ 652 csv_errmsg(&pCur->rdr, "out of memory"); 653 csv_xfer_error(pTab, &pCur->rdr); 654 break; 655 } 656 pCur->azVal[i] = zNew; 657 pCur->aLen[i] = pCur->rdr.n+1; 658 } 659 memcpy(pCur->azVal[i], z, pCur->rdr.n+1); 660 i++; 661 } 662 }while( pCur->rdr.cTerm==',' ); 663 while( i<pTab->nCol ){ 664 sqlite3_free(pCur->azVal[i]); 665 pCur->azVal[i] = 0; 666 pCur->aLen[i] = 0; 667 i++; 668 } 669 if( z==0 || pCur->rdr.cTerm==EOF ){ 670 pCur->iRowid = -1; 671 }else{ 672 pCur->iRowid++; 673 } 674 return SQLITE_OK; 675 } 676 677 /* 678 ** Return values of columns for the row at which the CsvCursor 679 ** is currently pointing. 680 */ 681 static int csvtabColumn( 682 sqlite3_vtab_cursor *cur, /* The cursor */ 683 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ 684 int i /* Which column to return */ 685 ){ 686 CsvCursor *pCur = (CsvCursor*)cur; 687 CsvTable *pTab = (CsvTable*)cur->pVtab; 688 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ 689 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); 690 } 691 return SQLITE_OK; 692 } 693 694 /* 695 ** Return the rowid for the current row. 696 */ 697 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 698 CsvCursor *pCur = (CsvCursor*)cur; 699 *pRowid = pCur->iRowid; 700 return SQLITE_OK; 701 } 702 703 /* 704 ** Return TRUE if the cursor has been moved off of the last 705 ** row of output. 706 */ 707 static int csvtabEof(sqlite3_vtab_cursor *cur){ 708 CsvCursor *pCur = (CsvCursor*)cur; 709 return pCur->iRowid<0; 710 } 711 712 /* 713 ** Only a full table scan is supported. So xFilter simply rewinds to 714 ** the beginning. 715 */ 716 static int csvtabFilter( 717 sqlite3_vtab_cursor *pVtabCursor, 718 int idxNum, const char *idxStr, 719 int argc, sqlite3_value **argv 720 ){ 721 CsvCursor *pCur = (CsvCursor*)pVtabCursor; 722 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; 723 pCur->iRowid = 0; 724 if( pCur->rdr.in==0 ){ 725 assert( pCur->rdr.zIn==pTab->zData ); 726 assert( pTab->iStart<=pCur->rdr.nIn ); 727 pCur->rdr.iIn = pTab->iStart; 728 }else{ 729 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); 730 pCur->rdr.iIn = 0; 731 pCur->rdr.nIn = 0; 732 } 733 return csvtabNext(pVtabCursor); 734 } 735 736 /* 737 ** Only a forward full table scan is supported. xBestIndex is mostly 738 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality 739 ** constraints lowers the estimated cost, which is fiction, but is useful 740 ** for testing certain kinds of virtual table behavior. 741 */ 742 static int csvtabBestIndex( 743 sqlite3_vtab *tab, 744 sqlite3_index_info *pIdxInfo 745 ){ 746 pIdxInfo->estimatedCost = 1000000; 747 #ifdef SQLITE_TEST 748 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ 749 /* The usual (and sensible) case is to always do a full table scan. 750 ** The code in this branch only runs when testflags=1. This code 751 ** generates an artifical and unrealistic plan which is useful 752 ** for testing virtual table logic but is not helpful to real applications. 753 ** 754 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual 755 ** table (even though it is not) and the cost of running the virtual table 756 ** is reduced from 1 million to just 10. The constraints are *not* marked 757 ** as omittable, however, so the query planner should still generate a 758 ** plan that gives a correct answer, even if they plan is not optimal. 759 */ 760 int i; 761 int nConst = 0; 762 for(i=0; i<pIdxInfo->nConstraint; i++){ 763 unsigned char op; 764 if( pIdxInfo->aConstraint[i].usable==0 ) continue; 765 op = pIdxInfo->aConstraint[i].op; 766 if( op==SQLITE_INDEX_CONSTRAINT_EQ 767 || op==SQLITE_INDEX_CONSTRAINT_LIKE 768 || op==SQLITE_INDEX_CONSTRAINT_GLOB 769 ){ 770 pIdxInfo->estimatedCost = 10; 771 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; 772 nConst++; 773 } 774 } 775 } 776 #endif 777 return SQLITE_OK; 778 } 779 780 781 static sqlite3_module CsvModule = { 782 0, /* iVersion */ 783 csvtabCreate, /* xCreate */ 784 csvtabConnect, /* xConnect */ 785 csvtabBestIndex, /* xBestIndex */ 786 csvtabDisconnect, /* xDisconnect */ 787 csvtabDisconnect, /* xDestroy */ 788 csvtabOpen, /* xOpen - open a cursor */ 789 csvtabClose, /* xClose - close a cursor */ 790 csvtabFilter, /* xFilter - configure scan constraints */ 791 csvtabNext, /* xNext - advance a cursor */ 792 csvtabEof, /* xEof - check for end of scan */ 793 csvtabColumn, /* xColumn - read data */ 794 csvtabRowid, /* xRowid - read data */ 795 0, /* xUpdate */ 796 0, /* xBegin */ 797 0, /* xSync */ 798 0, /* xCommit */ 799 0, /* xRollback */ 800 0, /* xFindMethod */ 801 0, /* xRename */ 802 }; 803 804 #ifdef SQLITE_TEST 805 /* 806 ** For virtual table testing, make a version of the CSV virtual table 807 ** available that has an xUpdate function. But the xUpdate always returns 808 ** SQLITE_READONLY since the CSV file is not really writable. 809 */ 810 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ 811 return SQLITE_READONLY; 812 } 813 static sqlite3_module CsvModuleFauxWrite = { 814 0, /* iVersion */ 815 csvtabCreate, /* xCreate */ 816 csvtabConnect, /* xConnect */ 817 csvtabBestIndex, /* xBestIndex */ 818 csvtabDisconnect, /* xDisconnect */ 819 csvtabDisconnect, /* xDestroy */ 820 csvtabOpen, /* xOpen - open a cursor */ 821 csvtabClose, /* xClose - close a cursor */ 822 csvtabFilter, /* xFilter - configure scan constraints */ 823 csvtabNext, /* xNext - advance a cursor */ 824 csvtabEof, /* xEof - check for end of scan */ 825 csvtabColumn, /* xColumn - read data */ 826 csvtabRowid, /* xRowid - read data */ 827 csvtabUpdate, /* xUpdate */ 828 0, /* xBegin */ 829 0, /* xSync */ 830 0, /* xCommit */ 831 0, /* xRollback */ 832 0, /* xFindMethod */ 833 0, /* xRename */ 834 }; 835 #endif /* SQLITE_TEST */ 836 837 838 839 #ifdef _WIN32 840 __declspec(dllexport) 841 #endif 842 /* 843 ** This routine is called when the extension is loaded. The new 844 ** CSV virtual table module is registered with the calling database 845 ** connection. 846 */ 847 int sqlite3_csv_init( 848 sqlite3 *db, 849 char **pzErrMsg, 850 const sqlite3_api_routines *pApi 851 ){ 852 int rc; 853 SQLITE_EXTENSION_INIT2(pApi); 854 rc = sqlite3_create_module(db, "csv", &CsvModule, 0); 855 #ifdef SQLITE_TEST 856 if( rc==SQLITE_OK ){ 857 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); 858 } 859 #endif 860 return rc; 861 } 862