1 /* 2 ** 2016-05-28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the implementation of an SQLite virtual table for 14 ** reading CSV files. 15 ** 16 ** Usage: 17 ** 18 ** .load ./csv 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); 20 ** SELECT * FROM csv; 21 ** 22 ** The columns are named "c1", "c2", "c3", ... by default. But the 23 ** application can define its own CREATE TABLE statement as an additional 24 ** parameter. For example: 25 ** 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( 27 ** filename = "../http.log", 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" 29 ** ); 30 ** 31 ** Instead of specifying a file, the text of the CSV can be loaded using 32 ** the data= parameter. 33 ** 34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have 35 ** N columns. If the columns parameter is omitted, the CSV file is opened 36 ** as soon as the virtual table is constructed and the first row of the CSV 37 ** is read in order to count the tables. 38 ** 39 ** Some extra debugging features (used for testing virtual tables) are available 40 ** if this module is compiled with -DSQLITE_TEST. 41 */ 42 #include <sqlite3ext.h> 43 SQLITE_EXTENSION_INIT1 44 #include <string.h> 45 #include <stdlib.h> 46 #include <assert.h> 47 #include <stdarg.h> 48 #include <ctype.h> 49 #include <stdio.h> 50 51 #ifndef SQLITE_OMIT_VIRTUALTABLE 52 53 /* 54 ** A macro to hint to the compiler that a function should not be 55 ** inlined. 56 */ 57 #if defined(__GNUC__) 58 # define CSV_NOINLINE __attribute__((noinline)) 59 #elif defined(_MSC_VER) && _MSC_VER>=1310 60 # define CSV_NOINLINE __declspec(noinline) 61 #else 62 # define CSV_NOINLINE 63 #endif 64 65 66 /* Max size of the error message in a CsvReader */ 67 #define CSV_MXERR 200 68 69 /* Size of the CsvReader input buffer */ 70 #define CSV_INBUFSZ 1024 71 72 /* A context object used when read a CSV file. */ 73 typedef struct CsvReader CsvReader; 74 struct CsvReader { 75 FILE *in; /* Read the CSV text from this input stream */ 76 char *z; /* Accumulated text for a field */ 77 int n; /* Number of bytes in z */ 78 int nAlloc; /* Space allocated for z[] */ 79 int nLine; /* Current line number */ 80 char cTerm; /* Character that terminated the most recent field */ 81 size_t iIn; /* Next unread character in the input buffer */ 82 size_t nIn; /* Number of characters in the input buffer */ 83 char *zIn; /* The input buffer */ 84 char zErr[CSV_MXERR]; /* Error message */ 85 }; 86 87 /* Initialize a CsvReader object */ 88 static void csv_reader_init(CsvReader *p){ 89 p->in = 0; 90 p->z = 0; 91 p->n = 0; 92 p->nAlloc = 0; 93 p->nLine = 0; 94 p->nIn = 0; 95 p->zIn = 0; 96 p->zErr[0] = 0; 97 } 98 99 /* Close and reset a CsvReader object */ 100 static void csv_reader_reset(CsvReader *p){ 101 if( p->in ){ 102 fclose(p->in); 103 sqlite3_free(p->zIn); 104 } 105 sqlite3_free(p->z); 106 csv_reader_init(p); 107 } 108 109 /* Report an error on a CsvReader */ 110 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ 111 va_list ap; 112 va_start(ap, zFormat); 113 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); 114 va_end(ap); 115 } 116 117 /* Open the file associated with a CsvReader 118 ** Return the number of errors. 119 */ 120 static int csv_reader_open( 121 CsvReader *p, /* The reader to open */ 122 const char *zFilename, /* Read from this filename */ 123 const char *zData /* ... or use this data */ 124 ){ 125 if( zFilename ){ 126 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); 127 if( p->zIn==0 ){ 128 csv_errmsg(p, "out of memory"); 129 return 1; 130 } 131 p->in = fopen(zFilename, "rb"); 132 if( p->in==0 ){ 133 csv_reader_reset(p); 134 csv_errmsg(p, "cannot open '%s' for reading", zFilename); 135 return 1; 136 } 137 }else{ 138 assert( p->in==0 ); 139 p->zIn = (char*)zData; 140 p->nIn = strlen(zData); 141 } 142 return 0; 143 } 144 145 /* The input buffer has overflowed. Refill the input buffer, then 146 ** return the next character 147 */ 148 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ 149 size_t got; 150 151 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ 152 assert( p->in!=0 ); /* Only called if reading froma file */ 153 154 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); 155 if( got==0 ) return EOF; 156 p->nIn = got; 157 p->iIn = 1; 158 return p->zIn[0]; 159 } 160 161 /* Return the next character of input. Return EOF at end of input. */ 162 static int csv_getc(CsvReader *p){ 163 if( p->iIn >= p->nIn ){ 164 if( p->in!=0 ) return csv_getc_refill(p); 165 return EOF; 166 } 167 return p->zIn[p->iIn++]; 168 } 169 170 /* Increase the size of p->z and append character c to the end. 171 ** Return 0 on success and non-zero if there is an OOM error */ 172 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ 173 char *zNew; 174 int nNew = p->nAlloc*2 + 100; 175 zNew = sqlite3_realloc64(p->z, nNew); 176 if( zNew ){ 177 p->z = zNew; 178 p->nAlloc = nNew; 179 p->z[p->n++] = c; 180 return 0; 181 }else{ 182 csv_errmsg(p, "out of memory"); 183 return 1; 184 } 185 } 186 187 /* Append a single character to the CsvReader.z[] array. 188 ** Return 0 on success and non-zero if there is an OOM error */ 189 static int csv_append(CsvReader *p, char c){ 190 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); 191 p->z[p->n++] = c; 192 return 0; 193 } 194 195 /* Read a single field of CSV text. Compatible with rfc4180 and extended 196 ** with the option of having a separator other than ",". 197 ** 198 ** + Input comes from p->in. 199 ** + Store results in p->z of length p->n. Space to hold p->z comes 200 ** from sqlite3_malloc64(). 201 ** + Keep track of the line number in p->nLine. 202 ** + Store the character that terminates the field in p->cTerm. Store 203 ** EOF on end-of-file. 204 ** 205 ** Return "" at EOF. Return 0 on an OOM error. 206 */ 207 static char *csv_read_one_field(CsvReader *p){ 208 int c; 209 p->n = 0; 210 c = csv_getc(p); 211 if( c==EOF ){ 212 p->cTerm = EOF; 213 return ""; 214 } 215 if( c=='"' ){ 216 int pc, ppc; 217 int startLine = p->nLine; 218 pc = ppc = 0; 219 while( 1 ){ 220 c = csv_getc(p); 221 if( c<='"' || pc=='"' ){ 222 if( c=='\n' ) p->nLine++; 223 if( c=='"' ){ 224 if( pc=='"' ){ 225 pc = 0; 226 continue; 227 } 228 } 229 if( (c==',' && pc=='"') 230 || (c=='\n' && pc=='"') 231 || (c=='\n' && pc=='\r' && ppc=='"') 232 || (c==EOF && pc=='"') 233 ){ 234 do{ p->n--; }while( p->z[p->n]!='"' ); 235 p->cTerm = (char)c; 236 break; 237 } 238 if( pc=='"' && c!='\r' ){ 239 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"'); 240 break; 241 } 242 if( c==EOF ){ 243 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", 244 startLine, '"'); 245 p->cTerm = (char)c; 246 break; 247 } 248 } 249 if( csv_append(p, (char)c) ) return 0; 250 ppc = pc; 251 pc = c; 252 } 253 }else{ 254 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ 255 if( csv_append(p, (char)c) ) return 0; 256 c = csv_getc(p); 257 } 258 if( c=='\n' ){ 259 p->nLine++; 260 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; 261 } 262 p->cTerm = (char)c; 263 } 264 if( p->z ) p->z[p->n] = 0; 265 return p->z; 266 } 267 268 269 /* Forward references to the various virtual table methods implemented 270 ** in this file. */ 271 static int csvtabCreate(sqlite3*, void*, int, const char*const*, 272 sqlite3_vtab**,char**); 273 static int csvtabConnect(sqlite3*, void*, int, const char*const*, 274 sqlite3_vtab**,char**); 275 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); 276 static int csvtabDisconnect(sqlite3_vtab*); 277 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); 278 static int csvtabClose(sqlite3_vtab_cursor*); 279 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, 280 int argc, sqlite3_value **argv); 281 static int csvtabNext(sqlite3_vtab_cursor*); 282 static int csvtabEof(sqlite3_vtab_cursor*); 283 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); 284 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); 285 286 /* An instance of the CSV virtual table */ 287 typedef struct CsvTable { 288 sqlite3_vtab base; /* Base class. Must be first */ 289 char *zFilename; /* Name of the CSV file */ 290 char *zData; /* Raw CSV data in lieu of zFilename */ 291 long iStart; /* Offset to start of data in zFilename */ 292 int nCol; /* Number of columns in the CSV file */ 293 unsigned int tstFlags; /* Bit values used for testing */ 294 } CsvTable; 295 296 /* Allowed values for tstFlags */ 297 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ 298 299 /* A cursor for the CSV virtual table */ 300 typedef struct CsvCursor { 301 sqlite3_vtab_cursor base; /* Base class. Must be first */ 302 CsvReader rdr; /* The CsvReader object */ 303 char **azVal; /* Value of the current row */ 304 int *aLen; /* Length of each entry */ 305 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ 306 } CsvCursor; 307 308 /* Transfer error message text from a reader into a CsvTable */ 309 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ 310 sqlite3_free(pTab->base.zErrMsg); 311 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); 312 } 313 314 /* 315 ** This method is the destructor fo a CsvTable object. 316 */ 317 static int csvtabDisconnect(sqlite3_vtab *pVtab){ 318 CsvTable *p = (CsvTable*)pVtab; 319 sqlite3_free(p->zFilename); 320 sqlite3_free(p->zData); 321 sqlite3_free(p); 322 return SQLITE_OK; 323 } 324 325 /* Skip leading whitespace. Return a pointer to the first non-whitespace 326 ** character, or to the zero terminator if the string has only whitespace */ 327 static const char *csv_skip_whitespace(const char *z){ 328 while( isspace((unsigned char)z[0]) ) z++; 329 return z; 330 } 331 332 /* Remove trailing whitespace from the end of string z[] */ 333 static void csv_trim_whitespace(char *z){ 334 size_t n = strlen(z); 335 while( n>0 && isspace((unsigned char)z[n]) ) n--; 336 z[n] = 0; 337 } 338 339 /* Dequote the string */ 340 static void csv_dequote(char *z){ 341 int j; 342 char cQuote = z[0]; 343 size_t i, n; 344 345 if( cQuote!='\'' && cQuote!='"' ) return; 346 n = strlen(z); 347 if( n<2 || z[n-1]!=z[0] ) return; 348 for(i=1, j=0; i<n-1; i++){ 349 if( z[i]==cQuote && z[i+1]==cQuote ) i++; 350 z[j++] = z[i]; 351 } 352 z[j] = 0; 353 } 354 355 /* Check to see if the string is of the form: "TAG = VALUE" with optional 356 ** whitespace before and around tokens. If it is, return a pointer to the 357 ** first character of VALUE. If it is not, return NULL. 358 */ 359 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ 360 z = csv_skip_whitespace(z); 361 if( strncmp(zTag, z, nTag)!=0 ) return 0; 362 z = csv_skip_whitespace(z+nTag); 363 if( z[0]!='=' ) return 0; 364 return csv_skip_whitespace(z+1); 365 } 366 367 /* Decode a parameter that requires a dequoted string. 368 ** 369 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned 370 ** even if there is an error. If an error occurs, then an error message 371 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. 372 */ 373 static int csv_string_parameter( 374 CsvReader *p, /* Leave the error message here, if there is one */ 375 const char *zParam, /* Parameter we are checking for */ 376 const char *zArg, /* Raw text of the virtual table argment */ 377 char **pzVal /* Write the dequoted string value here */ 378 ){ 379 const char *zValue; 380 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg); 381 if( zValue==0 ) return 0; 382 p->zErr[0] = 0; 383 if( *pzVal ){ 384 csv_errmsg(p, "more than one '%s' parameter", zParam); 385 return 1; 386 } 387 *pzVal = sqlite3_mprintf("%s", zValue); 388 if( *pzVal==0 ){ 389 csv_errmsg(p, "out of memory"); 390 return 1; 391 } 392 csv_trim_whitespace(*pzVal); 393 csv_dequote(*pzVal); 394 return 1; 395 } 396 397 398 /* Return 0 if the argument is false and 1 if it is true. Return -1 if 399 ** we cannot really tell. 400 */ 401 static int csv_boolean(const char *z){ 402 if( sqlite3_stricmp("yes",z)==0 403 || sqlite3_stricmp("on",z)==0 404 || sqlite3_stricmp("true",z)==0 405 || (z[0]=='1' && z[0]==0) 406 ){ 407 return 1; 408 } 409 if( sqlite3_stricmp("no",z)==0 410 || sqlite3_stricmp("off",z)==0 411 || sqlite3_stricmp("false",z)==0 412 || (z[0]=='0' && z[1]==0) 413 ){ 414 return 0; 415 } 416 return -1; 417 } 418 419 420 /* 421 ** Parameters: 422 ** filename=FILENAME Name of file containing CSV content 423 ** data=TEXT Direct CSV content. 424 ** schema=SCHEMA Alternative CSV schema. 425 ** header=YES|NO First row of CSV defines the names of 426 ** columns if "yes". Default "no". 427 ** columns=N Assume the CSV file contains N columns. 428 ** 429 ** Only available if compiled with SQLITE_TEST: 430 ** 431 ** testflags=N Bitmask of test flags. Optional 432 ** 433 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", 434 ** and so forth. If columns=N is omitted, then the file is opened and 435 ** the number of columns in the first row is counted to determine the 436 ** column count. If header=YES, then the first row is skipped. 437 */ 438 static int csvtabConnect( 439 sqlite3 *db, 440 void *pAux, 441 int argc, const char *const*argv, 442 sqlite3_vtab **ppVtab, 443 char **pzErr 444 ){ 445 CsvTable *pNew = 0; /* The CsvTable object to construct */ 446 int bHeader = -1; /* header= flags. -1 means not seen yet */ 447 int rc = SQLITE_OK; /* Result code from this routine */ 448 int i, j; /* Loop counters */ 449 #ifdef SQLITE_TEST 450 int tstFlags = 0; /* Value for testflags=N parameter */ 451 #endif 452 int nCol = -99; /* Value of the columns= parameter */ 453 CsvReader sRdr; /* A CSV file reader used to store an error 454 ** message and/or to count the number of columns */ 455 static const char *azParam[] = { 456 "filename", "data", "schema", 457 }; 458 char *azPValue[3]; /* Parameter values */ 459 # define CSV_FILENAME (azPValue[0]) 460 # define CSV_DATA (azPValue[1]) 461 # define CSV_SCHEMA (azPValue[2]) 462 463 464 assert( sizeof(azPValue)==sizeof(azParam) ); 465 memset(&sRdr, 0, sizeof(sRdr)); 466 memset(azPValue, 0, sizeof(azPValue)); 467 for(i=3; i<argc; i++){ 468 const char *z = argv[i]; 469 const char *zValue; 470 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ 471 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; 472 } 473 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ 474 if( sRdr.zErr[0] ) goto csvtab_connect_error; 475 }else 476 if( (zValue = csv_parameter("header",6,z))!=0 ){ 477 int x; 478 if( bHeader>=0 ){ 479 csv_errmsg(&sRdr, "more than one 'header' parameter"); 480 goto csvtab_connect_error; 481 } 482 x = csv_boolean(zValue); 483 if( x==1 ){ 484 bHeader = 1; 485 }else if( x==0 ){ 486 bHeader = 0; 487 }else{ 488 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); 489 goto csvtab_connect_error; 490 } 491 }else 492 #ifdef SQLITE_TEST 493 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ 494 tstFlags = (unsigned int)atoi(zValue); 495 }else 496 #endif 497 if( (zValue = csv_parameter("columns",7,z))!=0 ){ 498 if( nCol>0 ){ 499 csv_errmsg(&sRdr, "more than one 'columns' parameter"); 500 goto csvtab_connect_error; 501 } 502 nCol = atoi(zValue); 503 if( nCol<=0 ){ 504 csv_errmsg(&sRdr, "must have at least one column"); 505 goto csvtab_connect_error; 506 } 507 }else 508 { 509 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); 510 goto csvtab_connect_error; 511 } 512 } 513 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ 514 csv_errmsg(&sRdr, "must either filename= or data= but not both"); 515 goto csvtab_connect_error; 516 } 517 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ 518 goto csvtab_connect_error; 519 } 520 pNew = sqlite3_malloc( sizeof(*pNew) ); 521 *ppVtab = (sqlite3_vtab*)pNew; 522 if( pNew==0 ) goto csvtab_connect_oom; 523 memset(pNew, 0, sizeof(*pNew)); 524 if( nCol>0 ){ 525 pNew->nCol = nCol; 526 }else{ 527 do{ 528 const char *z = csv_read_one_field(&sRdr); 529 if( z==0 ) goto csvtab_connect_oom; 530 pNew->nCol++; 531 }while( sRdr.cTerm==',' ); 532 } 533 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; 534 pNew->zData = CSV_DATA; CSV_DATA = 0; 535 #ifdef SQLITE_TEST 536 pNew->tstFlags = tstFlags; 537 #endif 538 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; 539 csv_reader_reset(&sRdr); 540 if( CSV_SCHEMA==0 ){ 541 char *zSep = ""; 542 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); 543 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; 544 for(i=0; i<pNew->nCol; i++){ 545 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); 546 zSep = ","; 547 } 548 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); 549 } 550 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); 551 if( rc ) goto csvtab_connect_error; 552 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 553 sqlite3_free(azPValue[i]); 554 } 555 return SQLITE_OK; 556 557 csvtab_connect_oom: 558 rc = SQLITE_NOMEM; 559 csv_errmsg(&sRdr, "out of memory"); 560 561 csvtab_connect_error: 562 if( pNew ) csvtabDisconnect(&pNew->base); 563 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 564 sqlite3_free(azPValue[i]); 565 } 566 if( sRdr.zErr[0] ){ 567 sqlite3_free(*pzErr); 568 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); 569 } 570 csv_reader_reset(&sRdr); 571 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; 572 return rc; 573 } 574 575 /* 576 ** Reset the current row content held by a CsvCursor. 577 */ 578 static void csvtabCursorRowReset(CsvCursor *pCur){ 579 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; 580 int i; 581 for(i=0; i<pTab->nCol; i++){ 582 sqlite3_free(pCur->azVal[i]); 583 pCur->azVal[i] = 0; 584 pCur->aLen[i] = 0; 585 } 586 } 587 588 /* 589 ** The xConnect and xCreate methods do the same thing, but they must be 590 ** different so that the virtual table is not an eponymous virtual table. 591 */ 592 static int csvtabCreate( 593 sqlite3 *db, 594 void *pAux, 595 int argc, const char *const*argv, 596 sqlite3_vtab **ppVtab, 597 char **pzErr 598 ){ 599 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); 600 } 601 602 /* 603 ** Destructor for a CsvCursor. 604 */ 605 static int csvtabClose(sqlite3_vtab_cursor *cur){ 606 CsvCursor *pCur = (CsvCursor*)cur; 607 csvtabCursorRowReset(pCur); 608 csv_reader_reset(&pCur->rdr); 609 sqlite3_free(cur); 610 return SQLITE_OK; 611 } 612 613 /* 614 ** Constructor for a new CsvTable cursor object. 615 */ 616 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ 617 CsvTable *pTab = (CsvTable*)p; 618 CsvCursor *pCur; 619 size_t nByte; 620 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol; 621 pCur = sqlite3_malloc64( nByte ); 622 if( pCur==0 ) return SQLITE_NOMEM; 623 memset(pCur, 0, nByte); 624 pCur->azVal = (char**)&pCur[1]; 625 pCur->aLen = (int*)&pCur->azVal[pTab->nCol]; 626 *ppCursor = &pCur->base; 627 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ 628 csv_xfer_error(pTab, &pCur->rdr); 629 return SQLITE_ERROR; 630 } 631 return SQLITE_OK; 632 } 633 634 635 /* 636 ** Advance a CsvCursor to its next row of input. 637 ** Set the EOF marker if we reach the end of input. 638 */ 639 static int csvtabNext(sqlite3_vtab_cursor *cur){ 640 CsvCursor *pCur = (CsvCursor*)cur; 641 CsvTable *pTab = (CsvTable*)cur->pVtab; 642 int i = 0; 643 char *z; 644 do{ 645 z = csv_read_one_field(&pCur->rdr); 646 if( z==0 ){ 647 csv_xfer_error(pTab, &pCur->rdr); 648 break; 649 } 650 if( i<pTab->nCol ){ 651 if( pCur->aLen[i] < pCur->rdr.n+1 ){ 652 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1); 653 if( zNew==0 ){ 654 csv_errmsg(&pCur->rdr, "out of memory"); 655 csv_xfer_error(pTab, &pCur->rdr); 656 break; 657 } 658 pCur->azVal[i] = zNew; 659 pCur->aLen[i] = pCur->rdr.n+1; 660 } 661 memcpy(pCur->azVal[i], z, pCur->rdr.n+1); 662 i++; 663 } 664 }while( pCur->rdr.cTerm==',' ); 665 while( i<pTab->nCol ){ 666 sqlite3_free(pCur->azVal[i]); 667 pCur->azVal[i] = 0; 668 pCur->aLen[i] = 0; 669 i++; 670 } 671 if( z==0 || pCur->rdr.cTerm==EOF ){ 672 pCur->iRowid = -1; 673 }else{ 674 pCur->iRowid++; 675 } 676 return SQLITE_OK; 677 } 678 679 /* 680 ** Return values of columns for the row at which the CsvCursor 681 ** is currently pointing. 682 */ 683 static int csvtabColumn( 684 sqlite3_vtab_cursor *cur, /* The cursor */ 685 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ 686 int i /* Which column to return */ 687 ){ 688 CsvCursor *pCur = (CsvCursor*)cur; 689 CsvTable *pTab = (CsvTable*)cur->pVtab; 690 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ 691 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); 692 } 693 return SQLITE_OK; 694 } 695 696 /* 697 ** Return the rowid for the current row. 698 */ 699 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 700 CsvCursor *pCur = (CsvCursor*)cur; 701 *pRowid = pCur->iRowid; 702 return SQLITE_OK; 703 } 704 705 /* 706 ** Return TRUE if the cursor has been moved off of the last 707 ** row of output. 708 */ 709 static int csvtabEof(sqlite3_vtab_cursor *cur){ 710 CsvCursor *pCur = (CsvCursor*)cur; 711 return pCur->iRowid<0; 712 } 713 714 /* 715 ** Only a full table scan is supported. So xFilter simply rewinds to 716 ** the beginning. 717 */ 718 static int csvtabFilter( 719 sqlite3_vtab_cursor *pVtabCursor, 720 int idxNum, const char *idxStr, 721 int argc, sqlite3_value **argv 722 ){ 723 CsvCursor *pCur = (CsvCursor*)pVtabCursor; 724 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; 725 pCur->iRowid = 0; 726 if( pCur->rdr.in==0 ){ 727 assert( pCur->rdr.zIn==pTab->zData ); 728 assert( pTab->iStart>=0 ); 729 assert( (size_t)pTab->iStart<=pCur->rdr.nIn ); 730 pCur->rdr.iIn = pTab->iStart; 731 }else{ 732 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); 733 pCur->rdr.iIn = 0; 734 pCur->rdr.nIn = 0; 735 } 736 return csvtabNext(pVtabCursor); 737 } 738 739 /* 740 ** Only a forward full table scan is supported. xBestIndex is mostly 741 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality 742 ** constraints lowers the estimated cost, which is fiction, but is useful 743 ** for testing certain kinds of virtual table behavior. 744 */ 745 static int csvtabBestIndex( 746 sqlite3_vtab *tab, 747 sqlite3_index_info *pIdxInfo 748 ){ 749 pIdxInfo->estimatedCost = 1000000; 750 #ifdef SQLITE_TEST 751 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){ 752 /* The usual (and sensible) case is to always do a full table scan. 753 ** The code in this branch only runs when testflags=1. This code 754 ** generates an artifical and unrealistic plan which is useful 755 ** for testing virtual table logic but is not helpful to real applications. 756 ** 757 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual 758 ** table (even though it is not) and the cost of running the virtual table 759 ** is reduced from 1 million to just 10. The constraints are *not* marked 760 ** as omittable, however, so the query planner should still generate a 761 ** plan that gives a correct answer, even if they plan is not optimal. 762 */ 763 int i; 764 int nConst = 0; 765 for(i=0; i<pIdxInfo->nConstraint; i++){ 766 unsigned char op; 767 if( pIdxInfo->aConstraint[i].usable==0 ) continue; 768 op = pIdxInfo->aConstraint[i].op; 769 if( op==SQLITE_INDEX_CONSTRAINT_EQ 770 || op==SQLITE_INDEX_CONSTRAINT_LIKE 771 || op==SQLITE_INDEX_CONSTRAINT_GLOB 772 ){ 773 pIdxInfo->estimatedCost = 10; 774 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; 775 nConst++; 776 } 777 } 778 } 779 #endif 780 return SQLITE_OK; 781 } 782 783 784 static sqlite3_module CsvModule = { 785 0, /* iVersion */ 786 csvtabCreate, /* xCreate */ 787 csvtabConnect, /* xConnect */ 788 csvtabBestIndex, /* xBestIndex */ 789 csvtabDisconnect, /* xDisconnect */ 790 csvtabDisconnect, /* xDestroy */ 791 csvtabOpen, /* xOpen - open a cursor */ 792 csvtabClose, /* xClose - close a cursor */ 793 csvtabFilter, /* xFilter - configure scan constraints */ 794 csvtabNext, /* xNext - advance a cursor */ 795 csvtabEof, /* xEof - check for end of scan */ 796 csvtabColumn, /* xColumn - read data */ 797 csvtabRowid, /* xRowid - read data */ 798 0, /* xUpdate */ 799 0, /* xBegin */ 800 0, /* xSync */ 801 0, /* xCommit */ 802 0, /* xRollback */ 803 0, /* xFindMethod */ 804 0, /* xRename */ 805 }; 806 807 #ifdef SQLITE_TEST 808 /* 809 ** For virtual table testing, make a version of the CSV virtual table 810 ** available that has an xUpdate function. But the xUpdate always returns 811 ** SQLITE_READONLY since the CSV file is not really writable. 812 */ 813 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){ 814 return SQLITE_READONLY; 815 } 816 static sqlite3_module CsvModuleFauxWrite = { 817 0, /* iVersion */ 818 csvtabCreate, /* xCreate */ 819 csvtabConnect, /* xConnect */ 820 csvtabBestIndex, /* xBestIndex */ 821 csvtabDisconnect, /* xDisconnect */ 822 csvtabDisconnect, /* xDestroy */ 823 csvtabOpen, /* xOpen - open a cursor */ 824 csvtabClose, /* xClose - close a cursor */ 825 csvtabFilter, /* xFilter - configure scan constraints */ 826 csvtabNext, /* xNext - advance a cursor */ 827 csvtabEof, /* xEof - check for end of scan */ 828 csvtabColumn, /* xColumn - read data */ 829 csvtabRowid, /* xRowid - read data */ 830 csvtabUpdate, /* xUpdate */ 831 0, /* xBegin */ 832 0, /* xSync */ 833 0, /* xCommit */ 834 0, /* xRollback */ 835 0, /* xFindMethod */ 836 0, /* xRename */ 837 }; 838 #endif /* SQLITE_TEST */ 839 840 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ 841 842 843 #ifdef _WIN32 844 __declspec(dllexport) 845 #endif 846 /* 847 ** This routine is called when the extension is loaded. The new 848 ** CSV virtual table module is registered with the calling database 849 ** connection. 850 */ 851 int sqlite3_csv_init( 852 sqlite3 *db, 853 char **pzErrMsg, 854 const sqlite3_api_routines *pApi 855 ){ 856 #ifndef SQLITE_OMIT_VIRTUALTABLE 857 int rc; 858 SQLITE_EXTENSION_INIT2(pApi); 859 rc = sqlite3_create_module(db, "csv", &CsvModule, 0); 860 #ifdef SQLITE_TEST 861 if( rc==SQLITE_OK ){ 862 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0); 863 } 864 #endif 865 return rc; 866 #else 867 return SQLITE_OK; 868 #endif 869 } 870