1 /* 2 ** 2016-05-28 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ****************************************************************************** 12 ** 13 ** This file contains the implementation of an SQLite virtual table for 14 ** reading CSV files. 15 ** 16 ** Usage: 17 ** 18 ** .load ./csv 19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME); 20 ** SELECT * FROM csv; 21 ** 22 ** The columns are named "c1", "c2", "c3", ... by default. But the 23 ** application can define its own CREATE TABLE statement as an additional 24 ** parameter. For example: 25 ** 26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv( 27 ** filename = "../http.log", 28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)" 29 ** ); 30 */ 31 #include <sqlite3ext.h> 32 SQLITE_EXTENSION_INIT1 33 #include <string.h> 34 #include <stdlib.h> 35 #include <assert.h> 36 #include <stdarg.h> 37 #include <ctype.h> 38 #include <stdio.h> 39 40 /* 41 ** A macro to hint to the compiler that a function should not be 42 ** inlined. 43 */ 44 #if defined(__GNUC__) 45 # define CSV_NOINLINE __attribute__((noinline)) 46 #elif defined(_MSC_VER) && _MSC_VER>=1310 47 # define CSV_NOINLINE __declspec(noinline) 48 #else 49 # define CSV_NOINLINE 50 #endif 51 52 53 /* Max size of the error message in a CsvReader */ 54 #define CSV_MXERR 200 55 56 /* Size of the CsvReader input buffer */ 57 #define CSV_INBUFSZ 1024 58 59 /* A context object used when read a CSV file. */ 60 typedef struct CsvReader CsvReader; 61 struct CsvReader { 62 FILE *in; /* Read the CSV text from this input stream */ 63 char *z; /* Accumulated text for a field */ 64 int n; /* Number of bytes in z */ 65 int nAlloc; /* Space allocated for z[] */ 66 int nLine; /* Current line number */ 67 char cTerm; /* Character that terminated the most recent field */ 68 size_t iIn; /* Next unread character in the input buffer */ 69 size_t nIn; /* Number of characters in the input buffer */ 70 char *zIn; /* The input buffer */ 71 char zErr[CSV_MXERR]; /* Error message */ 72 }; 73 74 /* Initialize a CsvReader object */ 75 static void csv_reader_init(CsvReader *p){ 76 p->in = 0; 77 p->z = 0; 78 p->n = 0; 79 p->nAlloc = 0; 80 p->nLine = 0; 81 p->nIn = 0; 82 p->zIn = 0; 83 p->zErr[0] = 0; 84 } 85 86 /* Close and reset a CsvReader object */ 87 static void csv_reader_reset(CsvReader *p){ 88 if( p->in ){ 89 fclose(p->in); 90 sqlite3_free(p->zIn); 91 } 92 sqlite3_free(p->z); 93 csv_reader_init(p); 94 } 95 96 /* Report an error on a CsvReader */ 97 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){ 98 va_list ap; 99 va_start(ap, zFormat); 100 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap); 101 va_end(ap); 102 } 103 104 /* Open the file associated with a CsvReader 105 ** Return the number of errors. 106 */ 107 static int csv_reader_open( 108 CsvReader *p, /* The reader to open */ 109 const char *zFilename, /* Read from this filename */ 110 const char *zData /* ... or use this data */ 111 ){ 112 if( zFilename ){ 113 p->zIn = sqlite3_malloc( CSV_INBUFSZ ); 114 if( p->zIn==0 ){ 115 csv_errmsg(p, "out of memory"); 116 return 1; 117 } 118 p->in = fopen(zFilename, "rb"); 119 if( p->in==0 ){ 120 csv_reader_reset(p); 121 csv_errmsg(p, "cannot open '%s' for reading", zFilename); 122 return 1; 123 } 124 }else{ 125 assert( p->in==0 ); 126 p->zIn = (char*)zData; 127 p->nIn = strlen(zData); 128 } 129 return 0; 130 } 131 132 /* The input buffer has overflowed. Refill the input buffer, then 133 ** return the next character 134 */ 135 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){ 136 size_t got; 137 138 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */ 139 assert( p->in!=0 ); /* Only called if reading froma file */ 140 141 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in); 142 if( got==0 ) return EOF; 143 p->nIn = got; 144 p->iIn = 1; 145 return p->zIn[0]; 146 } 147 148 /* Return the next character of input. Return EOF at end of input. */ 149 static int csv_getc(CsvReader *p){ 150 if( p->iIn >= p->nIn ){ 151 if( p->in!=0 ) return csv_getc_refill(p); 152 return EOF; 153 } 154 return p->zIn[p->iIn++]; 155 } 156 157 /* Increase the size of p->z and append character c to the end. 158 ** Return 0 on success and non-zero if there is an OOM error */ 159 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){ 160 char *zNew; 161 int nNew = p->nAlloc*2 + 100; 162 zNew = sqlite3_realloc64(p->z, nNew); 163 if( zNew ){ 164 p->z = zNew; 165 p->nAlloc = nNew; 166 p->z[p->n++] = c; 167 return 0; 168 }else{ 169 csv_errmsg(p, "out of memory"); 170 return 1; 171 } 172 } 173 174 /* Append a single character to the CsvReader.z[] array. 175 ** Return 0 on success and non-zero if there is an OOM error */ 176 static int csv_append(CsvReader *p, char c){ 177 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c); 178 p->z[p->n++] = c; 179 return 0; 180 } 181 182 /* Read a single field of CSV text. Compatible with rfc4180 and extended 183 ** with the option of having a separator other than ",". 184 ** 185 ** + Input comes from p->in. 186 ** + Store results in p->z of length p->n. Space to hold p->z comes 187 ** from sqlite3_malloc64(). 188 ** + Keep track of the line number in p->nLine. 189 ** + Store the character that terminates the field in p->cTerm. Store 190 ** EOF on end-of-file. 191 ** 192 ** Return "" at EOF. Return 0 on an OOM error. 193 */ 194 static char *csv_read_one_field(CsvReader *p){ 195 int c; 196 p->n = 0; 197 c = csv_getc(p); 198 if( c==EOF ){ 199 p->cTerm = EOF; 200 return ""; 201 } 202 if( c=='"' ){ 203 int pc, ppc; 204 int startLine = p->nLine; 205 int cQuote = c; 206 pc = ppc = 0; 207 while( 1 ){ 208 c = csv_getc(p); 209 if( c=='\n' ) p->nLine++; 210 if( c==cQuote ){ 211 if( pc==cQuote ){ 212 pc = 0; 213 continue; 214 } 215 } 216 if( (c==',' && pc==cQuote) 217 || (c=='\n' && pc==cQuote) 218 || (c=='\n' && pc=='\r' && ppc==cQuote) 219 || (c==EOF && pc==cQuote) 220 ){ 221 do{ p->n--; }while( p->z[p->n]!=cQuote ); 222 p->cTerm = c; 223 break; 224 } 225 if( pc==cQuote && c!='\r' ){ 226 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote); 227 break; 228 } 229 if( c==EOF ){ 230 csv_errmsg(p, "line %d: unterminated %c-quoted field\n", 231 startLine, cQuote); 232 p->cTerm = c; 233 break; 234 } 235 if( csv_append(p, (char)c) ) return 0; 236 ppc = pc; 237 pc = c; 238 } 239 }else{ 240 while( c!=EOF && c!=',' && c!='\n' ){ 241 if( csv_append(p, (char)c) ) return 0; 242 c = csv_getc(p); 243 } 244 if( c=='\n' ){ 245 p->nLine++; 246 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--; 247 } 248 p->cTerm = c; 249 } 250 if( p->z ) p->z[p->n] = 0; 251 return p->z; 252 } 253 254 255 /* Forward references to the various virtual table methods implemented 256 ** in this file. */ 257 static int csvtabCreate(sqlite3*, void*, int, const char*const*, 258 sqlite3_vtab**,char**); 259 static int csvtabConnect(sqlite3*, void*, int, const char*const*, 260 sqlite3_vtab**,char**); 261 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*); 262 static int csvtabDisconnect(sqlite3_vtab*); 263 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**); 264 static int csvtabClose(sqlite3_vtab_cursor*); 265 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr, 266 int argc, sqlite3_value **argv); 267 static int csvtabNext(sqlite3_vtab_cursor*); 268 static int csvtabEof(sqlite3_vtab_cursor*); 269 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int); 270 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*); 271 272 /* An instance of the CSV virtual table */ 273 typedef struct CsvTable { 274 sqlite3_vtab base; /* Base class. Must be first */ 275 char *zFilename; /* Name of the CSV file */ 276 char *zData; /* Raw CSV data in lieu of zFilename */ 277 long iStart; /* Offset to start of data in zFilename */ 278 int nCol; /* Number of columns in the CSV file */ 279 unsigned int tstFlags; /* Bit values used for testing */ 280 } CsvTable; 281 282 /* Allowed values for tstFlags */ 283 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/ 284 285 /* A cursor for the CSV virtual table */ 286 typedef struct CsvCursor { 287 sqlite3_vtab_cursor base; /* Base class. Must be first */ 288 CsvReader rdr; /* The CsvReader object */ 289 char **azVal; /* Value of the current row */ 290 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */ 291 } CsvCursor; 292 293 /* Transfer error message text from a reader into a CsvTable */ 294 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){ 295 sqlite3_free(pTab->base.zErrMsg); 296 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr); 297 } 298 299 /* 300 ** This method is the destructor fo a CsvTable object. 301 */ 302 static int csvtabDisconnect(sqlite3_vtab *pVtab){ 303 CsvTable *p = (CsvTable*)pVtab; 304 sqlite3_free(p->zFilename); 305 sqlite3_free(p->zData); 306 sqlite3_free(p); 307 return SQLITE_OK; 308 } 309 310 /* Skip leading whitespace. Return a pointer to the first non-whitespace 311 ** character, or to the zero terminator if the string has only whitespace */ 312 static const char *csv_skip_whitespace(const char *z){ 313 while( isspace((unsigned char)z[0]) ) z++; 314 return z; 315 } 316 317 /* Remove trailing whitespace from the end of string z[] */ 318 static void csv_trim_whitespace(char *z){ 319 size_t n = strlen(z); 320 while( n>0 && isspace((unsigned char)z[n]) ) n--; 321 z[n] = 0; 322 } 323 324 /* Dequote the string */ 325 static void csv_dequote(char *z){ 326 int i, j; 327 char cQuote = z[0]; 328 size_t n; 329 330 if( cQuote!='\'' && cQuote!='"' ) return; 331 n = strlen(z); 332 if( n<2 || z[n-1]!=z[0] ) return; 333 for(i=1, j=0; i<n-1; i++){ 334 if( z[i]==cQuote && z[i+1]==cQuote ) i++; 335 z[j++] = z[i]; 336 } 337 z[j] = 0; 338 } 339 340 /* Check to see if the string is of the form: "TAG = VALUE" with optional 341 ** whitespace before and around tokens. If it is, return a pointer to the 342 ** first character of VALUE. If it is not, return NULL. 343 */ 344 static const char *csv_parameter(const char *zTag, int nTag, const char *z){ 345 z = csv_skip_whitespace(z); 346 if( strncmp(zTag, z, nTag)!=0 ) return 0; 347 z = csv_skip_whitespace(z+nTag); 348 if( z[0]!='=' ) return 0; 349 return csv_skip_whitespace(z+1); 350 } 351 352 /* Decode a parameter that requires a dequoted string. 353 ** 354 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned 355 ** even if there is an error. If an error occurs, then an error message 356 ** is left in p->zErr. If there are no errors, p->zErr[0]==0. 357 */ 358 static int csv_string_parameter( 359 CsvReader *p, /* Leave the error message here, if there is one */ 360 const char *zParam, /* Parameter we are checking for */ 361 const char *zArg, /* Raw text of the virtual table argment */ 362 char **pzVal /* Write the dequoted string value here */ 363 ){ 364 const char *zValue; 365 zValue = csv_parameter(zParam,strlen(zParam),zArg); 366 if( zValue==0 ) return 0; 367 p->zErr[0] = 0; 368 if( *pzVal ){ 369 csv_errmsg(p, "more than one '%s' parameter", zParam); 370 return 1; 371 } 372 *pzVal = sqlite3_mprintf("%s", zValue); 373 if( *pzVal==0 ){ 374 csv_errmsg(p, "out of memory"); 375 return 1; 376 } 377 csv_trim_whitespace(*pzVal); 378 csv_dequote(*pzVal); 379 return 1; 380 } 381 382 383 /* Return 0 if the argument is false and 1 if it is true. Return -1 if 384 ** we cannot really tell. 385 */ 386 static int csv_boolean(const char *z){ 387 if( sqlite3_stricmp("yes",z)==0 388 || sqlite3_stricmp("on",z)==0 389 || sqlite3_stricmp("true",z)==0 390 || (z[0]=='1' && z[0]==0) 391 ){ 392 return 1; 393 } 394 if( sqlite3_stricmp("no",z)==0 395 || sqlite3_stricmp("off",z)==0 396 || sqlite3_stricmp("false",z)==0 397 || (z[0]=='0' && z[1]==0) 398 ){ 399 return 0; 400 } 401 return -1; 402 } 403 404 405 /* 406 ** Parameters: 407 ** filename=FILENAME Name of file containing CSV content 408 ** data=TEXT Direct CSV content. 409 ** schema=SCHEMA Alternative CSV schema. 410 ** header=YES|NO First row of CSV defines the names of 411 ** columns if "yes". Default "no". 412 ** columns=N Assume the CSV file contains N columns. 413 ** testflags=N Bitmask of test flags. Optional 414 ** 415 ** If schema= is omitted, then the columns are named "c0", "c1", "c2", 416 ** and so forth. If columns=N is omitted, then the file is opened and 417 ** the number of columns in the first row is counted to determine the 418 ** column count. If header=YES, then the first row is skipped. 419 */ 420 static int csvtabConnect( 421 sqlite3 *db, 422 void *pAux, 423 int argc, const char *const*argv, 424 sqlite3_vtab **ppVtab, 425 char **pzErr 426 ){ 427 CsvTable *pNew = 0; /* The CsvTable object to construct */ 428 int bHeader = -1; /* header= flags. -1 means not seen yet */ 429 int rc = SQLITE_OK; /* Result code from this routine */ 430 int i, j; /* Loop counters */ 431 int tstFlags = 0; /* Value for testflags=N parameter */ 432 int nCol = -99; /* Value of the columns= parameter */ 433 CsvReader sRdr; /* A CSV file reader used to store an error 434 ** message and/or to count the number of columns */ 435 static const char *azParam[] = { 436 "filename", "data", "schema", 437 }; 438 char *azPValue[3]; /* Parameter values */ 439 # define CSV_FILENAME (azPValue[0]) 440 # define CSV_DATA (azPValue[1]) 441 # define CSV_SCHEMA (azPValue[2]) 442 443 444 assert( sizeof(azPValue)==sizeof(azParam) ); 445 memset(&sRdr, 0, sizeof(sRdr)); 446 memset(azPValue, 0, sizeof(azPValue)); 447 for(i=3; i<argc; i++){ 448 const char *z = argv[i]; 449 const char *zValue; 450 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){ 451 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break; 452 } 453 if( j<sizeof(azParam)/sizeof(azParam[0]) ){ 454 if( sRdr.zErr[0] ) goto csvtab_connect_error; 455 }else 456 if( (zValue = csv_parameter("header",6,z))!=0 ){ 457 int x; 458 if( bHeader>=0 ){ 459 csv_errmsg(&sRdr, "more than one 'header' parameter"); 460 goto csvtab_connect_error; 461 } 462 x = csv_boolean(zValue); 463 if( x==1 ){ 464 bHeader = 1; 465 }else if( x==0 ){ 466 bHeader = 0; 467 }else{ 468 csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue); 469 goto csvtab_connect_error; 470 } 471 }else 472 if( (zValue = csv_parameter("testflags",9,z))!=0 ){ 473 tstFlags = (unsigned int)atoi(zValue); 474 }else 475 if( (zValue = csv_parameter("columns",7,z))!=0 ){ 476 if( nCol>0 ){ 477 csv_errmsg(&sRdr, "more than one 'columns' parameter"); 478 goto csvtab_connect_error; 479 } 480 nCol = atoi(zValue); 481 if( nCol<=0 ){ 482 csv_errmsg(&sRdr, "must have at least one column"); 483 goto csvtab_connect_error; 484 } 485 }else 486 { 487 csv_errmsg(&sRdr, "unrecognized parameter '%s'", z); 488 goto csvtab_connect_error; 489 } 490 } 491 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){ 492 csv_errmsg(&sRdr, "must either filename= or data= but not both"); 493 goto csvtab_connect_error; 494 } 495 if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){ 496 goto csvtab_connect_error; 497 } 498 pNew = sqlite3_malloc( sizeof(*pNew) ); 499 *ppVtab = (sqlite3_vtab*)pNew; 500 if( pNew==0 ) goto csvtab_connect_oom; 501 memset(pNew, 0, sizeof(*pNew)); 502 if( nCol>0 ){ 503 pNew->nCol = nCol; 504 }else{ 505 do{ 506 const char *z = csv_read_one_field(&sRdr); 507 if( z==0 ) goto csvtab_connect_oom; 508 pNew->nCol++; 509 }while( sRdr.cTerm==',' ); 510 } 511 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0; 512 pNew->zData = CSV_DATA; CSV_DATA = 0; 513 pNew->tstFlags = tstFlags; 514 pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0; 515 csv_reader_reset(&sRdr); 516 if( CSV_SCHEMA==0 ){ 517 char *zSep = ""; 518 CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x("); 519 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom; 520 for(i=0; i<pNew->nCol; i++){ 521 CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i); 522 zSep = ","; 523 } 524 CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA); 525 } 526 rc = sqlite3_declare_vtab(db, CSV_SCHEMA); 527 if( rc ) goto csvtab_connect_error; 528 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 529 sqlite3_free(azPValue[i]); 530 } 531 return SQLITE_OK; 532 533 csvtab_connect_oom: 534 rc = SQLITE_NOMEM; 535 csv_errmsg(&sRdr, "out of memory"); 536 537 csvtab_connect_error: 538 if( pNew ) csvtabDisconnect(&pNew->base); 539 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){ 540 sqlite3_free(azPValue[i]); 541 } 542 if( sRdr.zErr[0] ){ 543 sqlite3_free(*pzErr); 544 *pzErr = sqlite3_mprintf("%s", sRdr.zErr); 545 } 546 csv_reader_reset(&sRdr); 547 if( rc==SQLITE_OK ) rc = SQLITE_ERROR; 548 return rc; 549 } 550 551 /* 552 ** Reset the current row content held by a CsvCursor. 553 */ 554 static void csvtabCursorRowReset(CsvCursor *pCur){ 555 CsvTable *pTab = (CsvTable*)pCur->base.pVtab; 556 int i; 557 for(i=0; i<pTab->nCol; i++){ 558 sqlite3_free(pCur->azVal[i]); 559 pCur->azVal[i] = 0; 560 } 561 } 562 563 /* 564 ** The xConnect and xCreate methods do the same thing, but they must be 565 ** different so that the virtual table is not an eponymous virtual table. 566 */ 567 static int csvtabCreate( 568 sqlite3 *db, 569 void *pAux, 570 int argc, const char *const*argv, 571 sqlite3_vtab **ppVtab, 572 char **pzErr 573 ){ 574 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr); 575 } 576 577 /* 578 ** Destructor for a CsvCursor. 579 */ 580 static int csvtabClose(sqlite3_vtab_cursor *cur){ 581 CsvCursor *pCur = (CsvCursor*)cur; 582 csvtabCursorRowReset(pCur); 583 csv_reader_reset(&pCur->rdr); 584 sqlite3_free(cur); 585 return SQLITE_OK; 586 } 587 588 /* 589 ** Constructor for a new CsvTable cursor object. 590 */ 591 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ 592 CsvTable *pTab = (CsvTable*)p; 593 CsvCursor *pCur; 594 pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol ); 595 if( pCur==0 ) return SQLITE_NOMEM; 596 memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol ); 597 pCur->azVal = (char**)&pCur[1]; 598 *ppCursor = &pCur->base; 599 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){ 600 csv_xfer_error(pTab, &pCur->rdr); 601 return SQLITE_ERROR; 602 } 603 return SQLITE_OK; 604 } 605 606 607 /* 608 ** Advance a CsvCursor to its next row of input. 609 ** Set the EOF marker if we reach the end of input. 610 */ 611 static int csvtabNext(sqlite3_vtab_cursor *cur){ 612 CsvCursor *pCur = (CsvCursor*)cur; 613 CsvTable *pTab = (CsvTable*)cur->pVtab; 614 int i = 0; 615 char *z; 616 csvtabCursorRowReset(pCur); 617 do{ 618 z = csv_read_one_field(&pCur->rdr); 619 if( z==0 ){ 620 csv_xfer_error(pTab, &pCur->rdr); 621 break; 622 } 623 z = sqlite3_mprintf("%s", z); 624 if( z==0 ){ 625 csv_errmsg(&pCur->rdr, "out of memory"); 626 csv_xfer_error(pTab, &pCur->rdr); 627 break; 628 } 629 if( i<pTab->nCol ){ 630 pCur->azVal[i++] = z; 631 } 632 }while( z!=0 && pCur->rdr.cTerm==',' ); 633 if( z==0 || pCur->rdr.cTerm==EOF ){ 634 pCur->iRowid = -1; 635 }else{ 636 pCur->iRowid++; 637 } 638 return SQLITE_OK; 639 } 640 641 /* 642 ** Return values of columns for the row at which the CsvCursor 643 ** is currently pointing. 644 */ 645 static int csvtabColumn( 646 sqlite3_vtab_cursor *cur, /* The cursor */ 647 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ 648 int i /* Which column to return */ 649 ){ 650 CsvCursor *pCur = (CsvCursor*)cur; 651 CsvTable *pTab = (CsvTable*)cur->pVtab; 652 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){ 653 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC); 654 } 655 return SQLITE_OK; 656 } 657 658 /* 659 ** Return the rowid for the current row. 660 */ 661 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 662 CsvCursor *pCur = (CsvCursor*)cur; 663 *pRowid = pCur->iRowid; 664 return SQLITE_OK; 665 } 666 667 /* 668 ** Return TRUE if the cursor has been moved off of the last 669 ** row of output. 670 */ 671 static int csvtabEof(sqlite3_vtab_cursor *cur){ 672 CsvCursor *pCur = (CsvCursor*)cur; 673 return pCur->iRowid<0; 674 } 675 676 /* 677 ** Only a full table scan is supported. So xFilter simply rewinds to 678 ** the beginning. 679 */ 680 static int csvtabFilter( 681 sqlite3_vtab_cursor *pVtabCursor, 682 int idxNum, const char *idxStr, 683 int argc, sqlite3_value **argv 684 ){ 685 CsvCursor *pCur = (CsvCursor*)pVtabCursor; 686 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab; 687 pCur->iRowid = 0; 688 if( pCur->rdr.in==0 ){ 689 assert( pCur->rdr.zIn==pTab->zData ); 690 assert( pTab->iStart<=pCur->rdr.nIn ); 691 pCur->rdr.iIn = pTab->iStart; 692 }else{ 693 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET); 694 pCur->rdr.iIn = 0; 695 pCur->rdr.nIn = 0; 696 } 697 return csvtabNext(pVtabCursor); 698 } 699 700 /* 701 ** Only a forward full table scan is supported. xBestIndex is mostly 702 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality 703 ** constraints lowers the estimated cost, which is fiction, but is useful 704 ** for testing certain kinds of virtual table behavior. 705 */ 706 static int csvtabBestIndex( 707 sqlite3_vtab *tab, 708 sqlite3_index_info *pIdxInfo 709 ){ 710 CsvTable *pTab = (CsvTable*)tab; 711 int i; 712 int nConst = 0; 713 pIdxInfo->estimatedCost = 1000000; 714 if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){ 715 return SQLITE_OK; 716 } 717 /* The usual (and sensible) case is to take the "return SQLITE_OK" above. 718 ** The code below only runs when testflags=1. The code below 719 ** generates an artifical and unrealistic plan which is useful 720 ** for testing virtual table logic but is not helpfulto real applications. 721 ** 722 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual 723 ** table (even though it is not) and the cost of running the virtual table 724 ** is reduced from 1 million to just 10. The constraints are *not* marked 725 ** as omittable, however, so the query planner should still generate a 726 ** plan that gives a correct answer, even if they plan is not optimal. 727 */ 728 for(i=0; i<pIdxInfo->nConstraint; i++){ 729 unsigned char op; 730 if( pIdxInfo->aConstraint[i].usable==0 ) continue; 731 op = pIdxInfo->aConstraint[i].op; 732 if( op==SQLITE_INDEX_CONSTRAINT_EQ 733 || op==SQLITE_INDEX_CONSTRAINT_LIKE 734 || op==SQLITE_INDEX_CONSTRAINT_GLOB 735 ){ 736 pIdxInfo->estimatedCost = 10; 737 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1; 738 nConst++; 739 } 740 } 741 return SQLITE_OK; 742 } 743 744 745 static sqlite3_module CsvModule = { 746 0, /* iVersion */ 747 csvtabCreate, /* xCreate */ 748 csvtabConnect, /* xConnect */ 749 csvtabBestIndex, /* xBestIndex */ 750 csvtabDisconnect, /* xDisconnect */ 751 csvtabDisconnect, /* xDestroy */ 752 csvtabOpen, /* xOpen - open a cursor */ 753 csvtabClose, /* xClose - close a cursor */ 754 csvtabFilter, /* xFilter - configure scan constraints */ 755 csvtabNext, /* xNext - advance a cursor */ 756 csvtabEof, /* xEof - check for end of scan */ 757 csvtabColumn, /* xColumn - read data */ 758 csvtabRowid, /* xRowid - read data */ 759 0, /* xUpdate */ 760 0, /* xBegin */ 761 0, /* xSync */ 762 0, /* xCommit */ 763 0, /* xRollback */ 764 0, /* xFindMethod */ 765 0, /* xRename */ 766 }; 767 768 #ifdef _WIN32 769 __declspec(dllexport) 770 #endif 771 /* 772 ** This routine is called when the extension is loaded. The new 773 ** CSV virtual table module is registered with the calling database 774 ** connection. 775 */ 776 int sqlite3_csv_init( 777 sqlite3 *db, 778 char **pzErrMsg, 779 const sqlite3_api_routines *pApi 780 ){ 781 SQLITE_EXTENSION_INIT2(pApi); 782 return sqlite3_create_module(db, "csv", &CsvModule, 0); 783 } 784