xref: /sqlite-3.40.0/ext/misc/csv.c (revision cb6acda9)
1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
15 **
16 ** Usage:
17 **
18 **    .load ./csv
19 **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 **    SELECT * FROM csv;
21 **
22 ** The columns are named "c1", "c2", "c3", ... by default.  But the
23 ** application can define its own CREATE TABLE statement as an additional
24 ** parameter.  For example:
25 **
26 **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 **       filename = "../http.log",
28 **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 **    );
30 **
31 ** Instead of specifying a file, the text of the CSV can be loaded using
32 ** the data= parameter.
33 **
34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35 ** N columns.  If the columns parameter is omitted, the CSV file is opened
36 ** as soon as the virtual table is constructed and the first row of the CSV
37 ** is read in order to count the tables.
38 **
39 ** Some extra debugging features (used for testing virtual tables) are available
40 ** if this module is compiled with -DSQLITE_TEST.
41 */
42 #include <sqlite3ext.h>
43 SQLITE_EXTENSION_INIT1
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 #ifndef SQLITE_OMIT_VIRTUALTABLE
52 
53 /*
54 ** A macro to hint to the compiler that a function should not be
55 ** inlined.
56 */
57 #if defined(__GNUC__)
58 #  define CSV_NOINLINE  __attribute__((noinline))
59 #elif defined(_MSC_VER) && _MSC_VER>=1310
60 #  define CSV_NOINLINE  __declspec(noinline)
61 #else
62 #  define CSV_NOINLINE
63 #endif
64 
65 
66 /* Max size of the error message in a CsvReader */
67 #define CSV_MXERR 200
68 
69 /* Size of the CsvReader input buffer */
70 #define CSV_INBUFSZ 1024
71 
72 /* A context object used when read a CSV file. */
73 typedef struct CsvReader CsvReader;
74 struct CsvReader {
75   FILE *in;              /* Read the CSV text from this input stream */
76   char *z;               /* Accumulated text for a field */
77   int n;                 /* Number of bytes in z */
78   int nAlloc;            /* Space allocated for z[] */
79   int nLine;             /* Current line number */
80   int bNotFirst;         /* True if prior text has been seen */
81   int cTerm;             /* Character that terminated the most recent field */
82   size_t iIn;            /* Next unread character in the input buffer */
83   size_t nIn;            /* Number of characters in the input buffer */
84   char *zIn;             /* The input buffer */
85   char zErr[CSV_MXERR];  /* Error message */
86 };
87 
88 /* Initialize a CsvReader object */
89 static void csv_reader_init(CsvReader *p){
90   p->in = 0;
91   p->z = 0;
92   p->n = 0;
93   p->nAlloc = 0;
94   p->nLine = 0;
95   p->bNotFirst = 0;
96   p->nIn = 0;
97   p->zIn = 0;
98   p->zErr[0] = 0;
99 }
100 
101 /* Close and reset a CsvReader object */
102 static void csv_reader_reset(CsvReader *p){
103   if( p->in ){
104     fclose(p->in);
105     sqlite3_free(p->zIn);
106   }
107   sqlite3_free(p->z);
108   csv_reader_init(p);
109 }
110 
111 /* Report an error on a CsvReader */
112 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
113   va_list ap;
114   va_start(ap, zFormat);
115   sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
116   va_end(ap);
117 }
118 
119 /* Open the file associated with a CsvReader
120 ** Return the number of errors.
121 */
122 static int csv_reader_open(
123   CsvReader *p,               /* The reader to open */
124   const char *zFilename,      /* Read from this filename */
125   const char *zData           /*  ... or use this data */
126 ){
127   if( zFilename ){
128     p->zIn = sqlite3_malloc( CSV_INBUFSZ );
129     if( p->zIn==0 ){
130       csv_errmsg(p, "out of memory");
131       return 1;
132     }
133     p->in = fopen(zFilename, "rb");
134     if( p->in==0 ){
135       csv_reader_reset(p);
136       csv_errmsg(p, "cannot open '%s' for reading", zFilename);
137       return 1;
138     }
139   }else{
140     assert( p->in==0 );
141     p->zIn = (char*)zData;
142     p->nIn = strlen(zData);
143   }
144   return 0;
145 }
146 
147 /* The input buffer has overflowed.  Refill the input buffer, then
148 ** return the next character
149 */
150 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
151   size_t got;
152 
153   assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
154   assert( p->in!=0 );        /* Only called if reading froma file */
155 
156   got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
157   if( got==0 ) return EOF;
158   p->nIn = got;
159   p->iIn = 1;
160   return p->zIn[0];
161 }
162 
163 /* Return the next character of input.  Return EOF at end of input. */
164 static int csv_getc(CsvReader *p){
165   if( p->iIn >= p->nIn ){
166     if( p->in!=0 ) return csv_getc_refill(p);
167     return EOF;
168   }
169   return ((unsigned char*)p->zIn)[p->iIn++];
170 }
171 
172 /* Increase the size of p->z and append character c to the end.
173 ** Return 0 on success and non-zero if there is an OOM error */
174 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
175   char *zNew;
176   int nNew = p->nAlloc*2 + 100;
177   zNew = sqlite3_realloc64(p->z, nNew);
178   if( zNew ){
179     p->z = zNew;
180     p->nAlloc = nNew;
181     p->z[p->n++] = c;
182     return 0;
183   }else{
184     csv_errmsg(p, "out of memory");
185     return 1;
186   }
187 }
188 
189 /* Append a single character to the CsvReader.z[] array.
190 ** Return 0 on success and non-zero if there is an OOM error */
191 static int csv_append(CsvReader *p, char c){
192   if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
193   p->z[p->n++] = c;
194   return 0;
195 }
196 
197 /* Read a single field of CSV text.  Compatible with rfc4180 and extended
198 ** with the option of having a separator other than ",".
199 **
200 **   +  Input comes from p->in.
201 **   +  Store results in p->z of length p->n.  Space to hold p->z comes
202 **      from sqlite3_malloc64().
203 **   +  Keep track of the line number in p->nLine.
204 **   +  Store the character that terminates the field in p->cTerm.  Store
205 **      EOF on end-of-file.
206 **
207 ** Return "" at EOF.  Return 0 on an OOM error.
208 */
209 static char *csv_read_one_field(CsvReader *p){
210   int c;
211   p->n = 0;
212   c = csv_getc(p);
213   if( c==EOF ){
214     p->cTerm = EOF;
215     return "";
216   }
217   if( c=='"' ){
218     int pc, ppc;
219     int startLine = p->nLine;
220     pc = ppc = 0;
221     while( 1 ){
222       c = csv_getc(p);
223       if( c<='"' || pc=='"' ){
224         if( c=='\n' ) p->nLine++;
225         if( c=='"' ){
226           if( pc=='"' ){
227             pc = 0;
228             continue;
229           }
230         }
231         if( (c==',' && pc=='"')
232          || (c=='\n' && pc=='"')
233          || (c=='\n' && pc=='\r' && ppc=='"')
234          || (c==EOF && pc=='"')
235         ){
236           do{ p->n--; }while( p->z[p->n]!='"' );
237           p->cTerm = (char)c;
238           break;
239         }
240         if( pc=='"' && c!='\r' ){
241           csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
242           break;
243         }
244         if( c==EOF ){
245           csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
246                      startLine, '"');
247           p->cTerm = (char)c;
248           break;
249         }
250       }
251       if( csv_append(p, (char)c) ) return 0;
252       ppc = pc;
253       pc = c;
254     }
255   }else{
256     /* If this is the first field being parsed and it begins with the
257     ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
258     if( (c&0xff)==0xef && p->bNotFirst==0 ){
259       csv_append(p, (char)c);
260       c = csv_getc(p);
261       if( (c&0xff)==0xbb ){
262         csv_append(p, (char)c);
263         c = csv_getc(p);
264         if( (c&0xff)==0xbf ){
265           p->bNotFirst = 1;
266           p->n = 0;
267           return csv_read_one_field(p);
268         }
269       }
270     }
271     while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
272       if( csv_append(p, (char)c) ) return 0;
273       c = csv_getc(p);
274     }
275     if( c=='\n' ){
276       p->nLine++;
277       if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
278     }
279     p->cTerm = (char)c;
280   }
281   if( p->z ) p->z[p->n] = 0;
282   p->bNotFirst = 1;
283   return p->z;
284 }
285 
286 
287 /* Forward references to the various virtual table methods implemented
288 ** in this file. */
289 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
290                            sqlite3_vtab**,char**);
291 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
292                            sqlite3_vtab**,char**);
293 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
294 static int csvtabDisconnect(sqlite3_vtab*);
295 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
296 static int csvtabClose(sqlite3_vtab_cursor*);
297 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
298                           int argc, sqlite3_value **argv);
299 static int csvtabNext(sqlite3_vtab_cursor*);
300 static int csvtabEof(sqlite3_vtab_cursor*);
301 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
302 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
303 
304 /* An instance of the CSV virtual table */
305 typedef struct CsvTable {
306   sqlite3_vtab base;              /* Base class.  Must be first */
307   char *zFilename;                /* Name of the CSV file */
308   char *zData;                    /* Raw CSV data in lieu of zFilename */
309   long iStart;                    /* Offset to start of data in zFilename */
310   int nCol;                       /* Number of columns in the CSV file */
311   unsigned int tstFlags;          /* Bit values used for testing */
312 } CsvTable;
313 
314 /* Allowed values for tstFlags */
315 #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
316 
317 /* A cursor for the CSV virtual table */
318 typedef struct CsvCursor {
319   sqlite3_vtab_cursor base;       /* Base class.  Must be first */
320   CsvReader rdr;                  /* The CsvReader object */
321   char **azVal;                   /* Value of the current row */
322   int *aLen;                      /* Length of each entry */
323   sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
324 } CsvCursor;
325 
326 /* Transfer error message text from a reader into a CsvTable */
327 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
328   sqlite3_free(pTab->base.zErrMsg);
329   pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
330 }
331 
332 /*
333 ** This method is the destructor fo a CsvTable object.
334 */
335 static int csvtabDisconnect(sqlite3_vtab *pVtab){
336   CsvTable *p = (CsvTable*)pVtab;
337   sqlite3_free(p->zFilename);
338   sqlite3_free(p->zData);
339   sqlite3_free(p);
340   return SQLITE_OK;
341 }
342 
343 /* Skip leading whitespace.  Return a pointer to the first non-whitespace
344 ** character, or to the zero terminator if the string has only whitespace */
345 static const char *csv_skip_whitespace(const char *z){
346   while( isspace((unsigned char)z[0]) ) z++;
347   return z;
348 }
349 
350 /* Remove trailing whitespace from the end of string z[] */
351 static void csv_trim_whitespace(char *z){
352   size_t n = strlen(z);
353   while( n>0 && isspace((unsigned char)z[n]) ) n--;
354   z[n] = 0;
355 }
356 
357 /* Dequote the string */
358 static void csv_dequote(char *z){
359   int j;
360   char cQuote = z[0];
361   size_t i, n;
362 
363   if( cQuote!='\'' && cQuote!='"' ) return;
364   n = strlen(z);
365   if( n<2 || z[n-1]!=z[0] ) return;
366   for(i=1, j=0; i<n-1; i++){
367     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
368     z[j++] = z[i];
369   }
370   z[j] = 0;
371 }
372 
373 /* Check to see if the string is of the form:  "TAG = VALUE" with optional
374 ** whitespace before and around tokens.  If it is, return a pointer to the
375 ** first character of VALUE.  If it is not, return NULL.
376 */
377 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
378   z = csv_skip_whitespace(z);
379   if( strncmp(zTag, z, nTag)!=0 ) return 0;
380   z = csv_skip_whitespace(z+nTag);
381   if( z[0]!='=' ) return 0;
382   return csv_skip_whitespace(z+1);
383 }
384 
385 /* Decode a parameter that requires a dequoted string.
386 **
387 ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
388 ** even if there is an error.  If an error occurs, then an error message
389 ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
390 */
391 static int csv_string_parameter(
392   CsvReader *p,            /* Leave the error message here, if there is one */
393   const char *zParam,      /* Parameter we are checking for */
394   const char *zArg,        /* Raw text of the virtual table argment */
395   char **pzVal             /* Write the dequoted string value here */
396 ){
397   const char *zValue;
398   zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
399   if( zValue==0 ) return 0;
400   p->zErr[0] = 0;
401   if( *pzVal ){
402     csv_errmsg(p, "more than one '%s' parameter", zParam);
403     return 1;
404   }
405   *pzVal = sqlite3_mprintf("%s", zValue);
406   if( *pzVal==0 ){
407     csv_errmsg(p, "out of memory");
408     return 1;
409   }
410   csv_trim_whitespace(*pzVal);
411   csv_dequote(*pzVal);
412   return 1;
413 }
414 
415 
416 /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
417 ** we cannot really tell.
418 */
419 static int csv_boolean(const char *z){
420   if( sqlite3_stricmp("yes",z)==0
421    || sqlite3_stricmp("on",z)==0
422    || sqlite3_stricmp("true",z)==0
423    || (z[0]=='1' && z[1]==0)
424   ){
425     return 1;
426   }
427   if( sqlite3_stricmp("no",z)==0
428    || sqlite3_stricmp("off",z)==0
429    || sqlite3_stricmp("false",z)==0
430    || (z[0]=='0' && z[1]==0)
431   ){
432     return 0;
433   }
434   return -1;
435 }
436 
437 
438 /*
439 ** Parameters:
440 **    filename=FILENAME          Name of file containing CSV content
441 **    data=TEXT                  Direct CSV content.
442 **    schema=SCHEMA              Alternative CSV schema.
443 **    header=YES|NO              First row of CSV defines the names of
444 **                               columns if "yes".  Default "no".
445 **    columns=N                  Assume the CSV file contains N columns.
446 **
447 ** Only available if compiled with SQLITE_TEST:
448 **
449 **    testflags=N                Bitmask of test flags.  Optional
450 **
451 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
452 ** and so forth.  If columns=N is omitted, then the file is opened and
453 ** the number of columns in the first row is counted to determine the
454 ** column count.  If header=YES, then the first row is skipped.
455 */
456 static int csvtabConnect(
457   sqlite3 *db,
458   void *pAux,
459   int argc, const char *const*argv,
460   sqlite3_vtab **ppVtab,
461   char **pzErr
462 ){
463   CsvTable *pNew = 0;        /* The CsvTable object to construct */
464   int bHeader = -1;          /* header= flags.  -1 means not seen yet */
465   int rc = SQLITE_OK;        /* Result code from this routine */
466   int i, j;                  /* Loop counters */
467 #ifdef SQLITE_TEST
468   int tstFlags = 0;          /* Value for testflags=N parameter */
469 #endif
470   int nCol = -99;            /* Value of the columns= parameter */
471   CsvReader sRdr;            /* A CSV file reader used to store an error
472                              ** message and/or to count the number of columns */
473   static const char *azParam[] = {
474      "filename", "data", "schema",
475   };
476   char *azPValue[3];         /* Parameter values */
477 # define CSV_FILENAME (azPValue[0])
478 # define CSV_DATA     (azPValue[1])
479 # define CSV_SCHEMA   (azPValue[2])
480 
481 
482   assert( sizeof(azPValue)==sizeof(azParam) );
483   memset(&sRdr, 0, sizeof(sRdr));
484   memset(azPValue, 0, sizeof(azPValue));
485   for(i=3; i<argc; i++){
486     const char *z = argv[i];
487     const char *zValue;
488     for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
489       if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
490     }
491     if( j<sizeof(azParam)/sizeof(azParam[0]) ){
492       if( sRdr.zErr[0] ) goto csvtab_connect_error;
493     }else
494     if( (zValue = csv_parameter("header",6,z))!=0 ){
495       int x;
496       if( bHeader>=0 ){
497         csv_errmsg(&sRdr, "more than one 'header' parameter");
498         goto csvtab_connect_error;
499       }
500       x = csv_boolean(zValue);
501       if( x==1 ){
502         bHeader = 1;
503       }else if( x==0 ){
504         bHeader = 0;
505       }else{
506         csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
507         goto csvtab_connect_error;
508       }
509     }else
510 #ifdef SQLITE_TEST
511     if( (zValue = csv_parameter("testflags",9,z))!=0 ){
512       tstFlags = (unsigned int)atoi(zValue);
513     }else
514 #endif
515     if( (zValue = csv_parameter("columns",7,z))!=0 ){
516       if( nCol>0 ){
517         csv_errmsg(&sRdr, "more than one 'columns' parameter");
518         goto csvtab_connect_error;
519       }
520       nCol = atoi(zValue);
521       if( nCol<=0 ){
522         csv_errmsg(&sRdr, "must have at least one column");
523         goto csvtab_connect_error;
524       }
525     }else
526     {
527       csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
528       goto csvtab_connect_error;
529     }
530   }
531   if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
532     csv_errmsg(&sRdr, "must either filename= or data= but not both");
533     goto csvtab_connect_error;
534   }
535   if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
536     goto csvtab_connect_error;
537   }
538   pNew = sqlite3_malloc( sizeof(*pNew) );
539   *ppVtab = (sqlite3_vtab*)pNew;
540   if( pNew==0 ) goto csvtab_connect_oom;
541   memset(pNew, 0, sizeof(*pNew));
542   if( nCol>0 ){
543     pNew->nCol = nCol;
544   }else{
545     do{
546       const char *z = csv_read_one_field(&sRdr);
547       if( z==0 ) goto csvtab_connect_oom;
548       pNew->nCol++;
549     }while( sRdr.cTerm==',' );
550   }
551   pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
552   pNew->zData = CSV_DATA;          CSV_DATA = 0;
553 #ifdef SQLITE_TEST
554   pNew->tstFlags = tstFlags;
555 #endif
556   pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
557   csv_reader_reset(&sRdr);
558   if( CSV_SCHEMA==0 ){
559     char *zSep = "";
560     CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
561     if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
562     for(i=0; i<pNew->nCol; i++){
563       CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
564       zSep = ",";
565     }
566     CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
567   }
568   rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
569   if( rc ) goto csvtab_connect_error;
570   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
571     sqlite3_free(azPValue[i]);
572   }
573   return SQLITE_OK;
574 
575 csvtab_connect_oom:
576   rc = SQLITE_NOMEM;
577   csv_errmsg(&sRdr, "out of memory");
578 
579 csvtab_connect_error:
580   if( pNew ) csvtabDisconnect(&pNew->base);
581   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
582     sqlite3_free(azPValue[i]);
583   }
584   if( sRdr.zErr[0] ){
585     sqlite3_free(*pzErr);
586     *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
587   }
588   csv_reader_reset(&sRdr);
589   if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
590   return rc;
591 }
592 
593 /*
594 ** Reset the current row content held by a CsvCursor.
595 */
596 static void csvtabCursorRowReset(CsvCursor *pCur){
597   CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
598   int i;
599   for(i=0; i<pTab->nCol; i++){
600     sqlite3_free(pCur->azVal[i]);
601     pCur->azVal[i] = 0;
602     pCur->aLen[i] = 0;
603   }
604 }
605 
606 /*
607 ** The xConnect and xCreate methods do the same thing, but they must be
608 ** different so that the virtual table is not an eponymous virtual table.
609 */
610 static int csvtabCreate(
611   sqlite3 *db,
612   void *pAux,
613   int argc, const char *const*argv,
614   sqlite3_vtab **ppVtab,
615   char **pzErr
616 ){
617  return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
618 }
619 
620 /*
621 ** Destructor for a CsvCursor.
622 */
623 static int csvtabClose(sqlite3_vtab_cursor *cur){
624   CsvCursor *pCur = (CsvCursor*)cur;
625   csvtabCursorRowReset(pCur);
626   csv_reader_reset(&pCur->rdr);
627   sqlite3_free(cur);
628   return SQLITE_OK;
629 }
630 
631 /*
632 ** Constructor for a new CsvTable cursor object.
633 */
634 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
635   CsvTable *pTab = (CsvTable*)p;
636   CsvCursor *pCur;
637   size_t nByte;
638   nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
639   pCur = sqlite3_malloc64( nByte );
640   if( pCur==0 ) return SQLITE_NOMEM;
641   memset(pCur, 0, nByte);
642   pCur->azVal = (char**)&pCur[1];
643   pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
644   *ppCursor = &pCur->base;
645   if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
646     csv_xfer_error(pTab, &pCur->rdr);
647     return SQLITE_ERROR;
648   }
649   return SQLITE_OK;
650 }
651 
652 
653 /*
654 ** Advance a CsvCursor to its next row of input.
655 ** Set the EOF marker if we reach the end of input.
656 */
657 static int csvtabNext(sqlite3_vtab_cursor *cur){
658   CsvCursor *pCur = (CsvCursor*)cur;
659   CsvTable *pTab = (CsvTable*)cur->pVtab;
660   int i = 0;
661   char *z;
662   do{
663     z = csv_read_one_field(&pCur->rdr);
664     if( z==0 ){
665       csv_xfer_error(pTab, &pCur->rdr);
666       break;
667     }
668     if( i<pTab->nCol ){
669       if( pCur->aLen[i] < pCur->rdr.n+1 ){
670         char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
671         if( zNew==0 ){
672           csv_errmsg(&pCur->rdr, "out of memory");
673           csv_xfer_error(pTab, &pCur->rdr);
674           break;
675         }
676         pCur->azVal[i] = zNew;
677         pCur->aLen[i] = pCur->rdr.n+1;
678       }
679       memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
680       i++;
681     }
682   }while( pCur->rdr.cTerm==',' );
683   if( z==0 || (pCur->rdr.cTerm==EOF && i<pTab->nCol) ){
684     pCur->iRowid = -1;
685   }else{
686     pCur->iRowid++;
687     while( i<pTab->nCol ){
688       sqlite3_free(pCur->azVal[i]);
689       pCur->azVal[i] = 0;
690       pCur->aLen[i] = 0;
691       i++;
692     }
693   }
694   return SQLITE_OK;
695 }
696 
697 /*
698 ** Return values of columns for the row at which the CsvCursor
699 ** is currently pointing.
700 */
701 static int csvtabColumn(
702   sqlite3_vtab_cursor *cur,   /* The cursor */
703   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
704   int i                       /* Which column to return */
705 ){
706   CsvCursor *pCur = (CsvCursor*)cur;
707   CsvTable *pTab = (CsvTable*)cur->pVtab;
708   if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
709     sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
710   }
711   return SQLITE_OK;
712 }
713 
714 /*
715 ** Return the rowid for the current row.
716 */
717 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
718   CsvCursor *pCur = (CsvCursor*)cur;
719   *pRowid = pCur->iRowid;
720   return SQLITE_OK;
721 }
722 
723 /*
724 ** Return TRUE if the cursor has been moved off of the last
725 ** row of output.
726 */
727 static int csvtabEof(sqlite3_vtab_cursor *cur){
728   CsvCursor *pCur = (CsvCursor*)cur;
729   return pCur->iRowid<0;
730 }
731 
732 /*
733 ** Only a full table scan is supported.  So xFilter simply rewinds to
734 ** the beginning.
735 */
736 static int csvtabFilter(
737   sqlite3_vtab_cursor *pVtabCursor,
738   int idxNum, const char *idxStr,
739   int argc, sqlite3_value **argv
740 ){
741   CsvCursor *pCur = (CsvCursor*)pVtabCursor;
742   CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
743   pCur->iRowid = 0;
744   if( pCur->rdr.in==0 ){
745     assert( pCur->rdr.zIn==pTab->zData );
746     assert( pTab->iStart>=0 );
747     assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
748     pCur->rdr.iIn = pTab->iStart;
749   }else{
750     fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
751     pCur->rdr.iIn = 0;
752     pCur->rdr.nIn = 0;
753   }
754   return csvtabNext(pVtabCursor);
755 }
756 
757 /*
758 ** Only a forward full table scan is supported.  xBestIndex is mostly
759 ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
760 ** constraints lowers the estimated cost, which is fiction, but is useful
761 ** for testing certain kinds of virtual table behavior.
762 */
763 static int csvtabBestIndex(
764   sqlite3_vtab *tab,
765   sqlite3_index_info *pIdxInfo
766 ){
767   pIdxInfo->estimatedCost = 1000000;
768 #ifdef SQLITE_TEST
769   if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
770     /* The usual (and sensible) case is to always do a full table scan.
771     ** The code in this branch only runs when testflags=1.  This code
772     ** generates an artifical and unrealistic plan which is useful
773     ** for testing virtual table logic but is not helpful to real applications.
774     **
775     ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
776     ** table (even though it is not) and the cost of running the virtual table
777     ** is reduced from 1 million to just 10.  The constraints are *not* marked
778     ** as omittable, however, so the query planner should still generate a
779     ** plan that gives a correct answer, even if they plan is not optimal.
780     */
781     int i;
782     int nConst = 0;
783     for(i=0; i<pIdxInfo->nConstraint; i++){
784       unsigned char op;
785       if( pIdxInfo->aConstraint[i].usable==0 ) continue;
786       op = pIdxInfo->aConstraint[i].op;
787       if( op==SQLITE_INDEX_CONSTRAINT_EQ
788        || op==SQLITE_INDEX_CONSTRAINT_LIKE
789        || op==SQLITE_INDEX_CONSTRAINT_GLOB
790       ){
791         pIdxInfo->estimatedCost = 10;
792         pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
793         nConst++;
794       }
795     }
796   }
797 #endif
798   return SQLITE_OK;
799 }
800 
801 
802 static sqlite3_module CsvModule = {
803   0,                       /* iVersion */
804   csvtabCreate,            /* xCreate */
805   csvtabConnect,           /* xConnect */
806   csvtabBestIndex,         /* xBestIndex */
807   csvtabDisconnect,        /* xDisconnect */
808   csvtabDisconnect,        /* xDestroy */
809   csvtabOpen,              /* xOpen - open a cursor */
810   csvtabClose,             /* xClose - close a cursor */
811   csvtabFilter,            /* xFilter - configure scan constraints */
812   csvtabNext,              /* xNext - advance a cursor */
813   csvtabEof,               /* xEof - check for end of scan */
814   csvtabColumn,            /* xColumn - read data */
815   csvtabRowid,             /* xRowid - read data */
816   0,                       /* xUpdate */
817   0,                       /* xBegin */
818   0,                       /* xSync */
819   0,                       /* xCommit */
820   0,                       /* xRollback */
821   0,                       /* xFindMethod */
822   0,                       /* xRename */
823 };
824 
825 #ifdef SQLITE_TEST
826 /*
827 ** For virtual table testing, make a version of the CSV virtual table
828 ** available that has an xUpdate function.  But the xUpdate always returns
829 ** SQLITE_READONLY since the CSV file is not really writable.
830 */
831 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
832   return SQLITE_READONLY;
833 }
834 static sqlite3_module CsvModuleFauxWrite = {
835   0,                       /* iVersion */
836   csvtabCreate,            /* xCreate */
837   csvtabConnect,           /* xConnect */
838   csvtabBestIndex,         /* xBestIndex */
839   csvtabDisconnect,        /* xDisconnect */
840   csvtabDisconnect,        /* xDestroy */
841   csvtabOpen,              /* xOpen - open a cursor */
842   csvtabClose,             /* xClose - close a cursor */
843   csvtabFilter,            /* xFilter - configure scan constraints */
844   csvtabNext,              /* xNext - advance a cursor */
845   csvtabEof,               /* xEof - check for end of scan */
846   csvtabColumn,            /* xColumn - read data */
847   csvtabRowid,             /* xRowid - read data */
848   csvtabUpdate,            /* xUpdate */
849   0,                       /* xBegin */
850   0,                       /* xSync */
851   0,                       /* xCommit */
852   0,                       /* xRollback */
853   0,                       /* xFindMethod */
854   0,                       /* xRename */
855 };
856 #endif /* SQLITE_TEST */
857 
858 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
859 
860 
861 #ifdef _WIN32
862 __declspec(dllexport)
863 #endif
864 /*
865 ** This routine is called when the extension is loaded.  The new
866 ** CSV virtual table module is registered with the calling database
867 ** connection.
868 */
869 int sqlite3_csv_init(
870   sqlite3 *db,
871   char **pzErrMsg,
872   const sqlite3_api_routines *pApi
873 ){
874 #ifndef SQLITE_OMIT_VIRTUALTABLE
875   int rc;
876   SQLITE_EXTENSION_INIT2(pApi);
877   rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
878 #ifdef SQLITE_TEST
879   if( rc==SQLITE_OK ){
880     rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
881   }
882 #endif
883   return rc;
884 #else
885   return SQLITE_OK;
886 #endif
887 }
888