xref: /sqlite-3.40.0/ext/misc/csv.c (revision ac9c3d2c)
1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
15 **
16 ** Usage:
17 **
18 **    .load ./csv
19 **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 **    SELECT * FROM csv;
21 **
22 ** The columns are named "c1", "c2", "c3", ... by default.  But the
23 ** application can define its own CREATE TABLE statement as an additional
24 ** parameter.  For example:
25 **
26 **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 **       filename = "../http.log",
28 **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 **    );
30 **
31 ** Instead of specifying a file, the text of the CSV can be loaded using
32 ** the data= parameter.
33 **
34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35 ** N columns.  If the columns parameter is omitted, the CSV file is opened
36 ** as soon as the virtual table is constructed and the first row of the CSV
37 ** is read in order to count the tables.
38 **
39 ** Some extra debugging features (used for testing virtual tables) are available
40 ** if this module is compiled with -DSQLITE_TEST.
41 */
42 #include <sqlite3ext.h>
43 SQLITE_EXTENSION_INIT1
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 /*
52 ** A macro to hint to the compiler that a function should not be
53 ** inlined.
54 */
55 #if defined(__GNUC__)
56 #  define CSV_NOINLINE  __attribute__((noinline))
57 #elif defined(_MSC_VER) && _MSC_VER>=1310
58 #  define CSV_NOINLINE  __declspec(noinline)
59 #else
60 #  define CSV_NOINLINE
61 #endif
62 
63 
64 /* Max size of the error message in a CsvReader */
65 #define CSV_MXERR 200
66 
67 /* Size of the CsvReader input buffer */
68 #define CSV_INBUFSZ 1024
69 
70 /* A context object used when read a CSV file. */
71 typedef struct CsvReader CsvReader;
72 struct CsvReader {
73   FILE *in;              /* Read the CSV text from this input stream */
74   char *z;               /* Accumulated text for a field */
75   int n;                 /* Number of bytes in z */
76   int nAlloc;            /* Space allocated for z[] */
77   int nLine;             /* Current line number */
78   char cTerm;            /* Character that terminated the most recent field */
79   size_t iIn;            /* Next unread character in the input buffer */
80   size_t nIn;            /* Number of characters in the input buffer */
81   char *zIn;             /* The input buffer */
82   char zErr[CSV_MXERR];  /* Error message */
83 };
84 
85 /* Initialize a CsvReader object */
86 static void csv_reader_init(CsvReader *p){
87   p->in = 0;
88   p->z = 0;
89   p->n = 0;
90   p->nAlloc = 0;
91   p->nLine = 0;
92   p->nIn = 0;
93   p->zIn = 0;
94   p->zErr[0] = 0;
95 }
96 
97 /* Close and reset a CsvReader object */
98 static void csv_reader_reset(CsvReader *p){
99   if( p->in ){
100     fclose(p->in);
101     sqlite3_free(p->zIn);
102   }
103   sqlite3_free(p->z);
104   csv_reader_init(p);
105 }
106 
107 /* Report an error on a CsvReader */
108 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
109   va_list ap;
110   va_start(ap, zFormat);
111   sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
112   va_end(ap);
113 }
114 
115 /* Open the file associated with a CsvReader
116 ** Return the number of errors.
117 */
118 static int csv_reader_open(
119   CsvReader *p,               /* The reader to open */
120   const char *zFilename,      /* Read from this filename */
121   const char *zData           /*  ... or use this data */
122 ){
123   if( zFilename ){
124     p->zIn = sqlite3_malloc( CSV_INBUFSZ );
125     if( p->zIn==0 ){
126       csv_errmsg(p, "out of memory");
127       return 1;
128     }
129     p->in = fopen(zFilename, "rb");
130     if( p->in==0 ){
131       csv_reader_reset(p);
132       csv_errmsg(p, "cannot open '%s' for reading", zFilename);
133       return 1;
134     }
135   }else{
136     assert( p->in==0 );
137     p->zIn = (char*)zData;
138     p->nIn = strlen(zData);
139   }
140   return 0;
141 }
142 
143 /* The input buffer has overflowed.  Refill the input buffer, then
144 ** return the next character
145 */
146 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
147   size_t got;
148 
149   assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
150   assert( p->in!=0 );        /* Only called if reading froma file */
151 
152   got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
153   if( got==0 ) return EOF;
154   p->nIn = got;
155   p->iIn = 1;
156   return p->zIn[0];
157 }
158 
159 /* Return the next character of input.  Return EOF at end of input. */
160 static int csv_getc(CsvReader *p){
161   if( p->iIn >= p->nIn ){
162     if( p->in!=0 ) return csv_getc_refill(p);
163     return EOF;
164   }
165   return p->zIn[p->iIn++];
166 }
167 
168 /* Increase the size of p->z and append character c to the end.
169 ** Return 0 on success and non-zero if there is an OOM error */
170 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
171   char *zNew;
172   int nNew = p->nAlloc*2 + 100;
173   zNew = sqlite3_realloc64(p->z, nNew);
174   if( zNew ){
175     p->z = zNew;
176     p->nAlloc = nNew;
177     p->z[p->n++] = c;
178     return 0;
179   }else{
180     csv_errmsg(p, "out of memory");
181     return 1;
182   }
183 }
184 
185 /* Append a single character to the CsvReader.z[] array.
186 ** Return 0 on success and non-zero if there is an OOM error */
187 static int csv_append(CsvReader *p, char c){
188   if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
189   p->z[p->n++] = c;
190   return 0;
191 }
192 
193 /* Read a single field of CSV text.  Compatible with rfc4180 and extended
194 ** with the option of having a separator other than ",".
195 **
196 **   +  Input comes from p->in.
197 **   +  Store results in p->z of length p->n.  Space to hold p->z comes
198 **      from sqlite3_malloc64().
199 **   +  Keep track of the line number in p->nLine.
200 **   +  Store the character that terminates the field in p->cTerm.  Store
201 **      EOF on end-of-file.
202 **
203 ** Return "" at EOF.  Return 0 on an OOM error.
204 */
205 static char *csv_read_one_field(CsvReader *p){
206   int c;
207   p->n = 0;
208   c = csv_getc(p);
209   if( c==EOF ){
210     p->cTerm = EOF;
211     return "";
212   }
213   if( c=='"' ){
214     int pc, ppc;
215     int startLine = p->nLine;
216     pc = ppc = 0;
217     while( 1 ){
218       c = csv_getc(p);
219       if( c<='"' || pc=='"' ){
220         if( c=='\n' ) p->nLine++;
221         if( c=='"' ){
222           if( pc=='"' ){
223             pc = 0;
224             continue;
225           }
226         }
227         if( (c==',' && pc=='"')
228          || (c=='\n' && pc=='"')
229          || (c=='\n' && pc=='\r' && ppc=='"')
230          || (c==EOF && pc=='"')
231         ){
232           do{ p->n--; }while( p->z[p->n]!='"' );
233           p->cTerm = c;
234           break;
235         }
236         if( pc=='"' && c!='\r' ){
237           csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
238           break;
239         }
240         if( c==EOF ){
241           csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
242                      startLine, '"');
243           p->cTerm = c;
244           break;
245         }
246       }
247       if( csv_append(p, (char)c) ) return 0;
248       ppc = pc;
249       pc = c;
250     }
251   }else{
252     while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
253       if( csv_append(p, (char)c) ) return 0;
254       c = csv_getc(p);
255     }
256     if( c=='\n' ){
257       p->nLine++;
258       if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
259     }
260     p->cTerm = c;
261   }
262   if( p->z ) p->z[p->n] = 0;
263   return p->z;
264 }
265 
266 
267 /* Forward references to the various virtual table methods implemented
268 ** in this file. */
269 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
270                            sqlite3_vtab**,char**);
271 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
272                            sqlite3_vtab**,char**);
273 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
274 static int csvtabDisconnect(sqlite3_vtab*);
275 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
276 static int csvtabClose(sqlite3_vtab_cursor*);
277 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
278                           int argc, sqlite3_value **argv);
279 static int csvtabNext(sqlite3_vtab_cursor*);
280 static int csvtabEof(sqlite3_vtab_cursor*);
281 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
282 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
283 
284 /* An instance of the CSV virtual table */
285 typedef struct CsvTable {
286   sqlite3_vtab base;              /* Base class.  Must be first */
287   char *zFilename;                /* Name of the CSV file */
288   char *zData;                    /* Raw CSV data in lieu of zFilename */
289   long iStart;                    /* Offset to start of data in zFilename */
290   int nCol;                       /* Number of columns in the CSV file */
291   unsigned int tstFlags;          /* Bit values used for testing */
292 } CsvTable;
293 
294 /* Allowed values for tstFlags */
295 #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
296 
297 /* A cursor for the CSV virtual table */
298 typedef struct CsvCursor {
299   sqlite3_vtab_cursor base;       /* Base class.  Must be first */
300   CsvReader rdr;                  /* The CsvReader object */
301   char **azVal;                   /* Value of the current row */
302   int *aLen;                      /* Length of each entry */
303   sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
304 } CsvCursor;
305 
306 /* Transfer error message text from a reader into a CsvTable */
307 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
308   sqlite3_free(pTab->base.zErrMsg);
309   pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
310 }
311 
312 /*
313 ** This method is the destructor fo a CsvTable object.
314 */
315 static int csvtabDisconnect(sqlite3_vtab *pVtab){
316   CsvTable *p = (CsvTable*)pVtab;
317   sqlite3_free(p->zFilename);
318   sqlite3_free(p->zData);
319   sqlite3_free(p);
320   return SQLITE_OK;
321 }
322 
323 /* Skip leading whitespace.  Return a pointer to the first non-whitespace
324 ** character, or to the zero terminator if the string has only whitespace */
325 static const char *csv_skip_whitespace(const char *z){
326   while( isspace((unsigned char)z[0]) ) z++;
327   return z;
328 }
329 
330 /* Remove trailing whitespace from the end of string z[] */
331 static void csv_trim_whitespace(char *z){
332   size_t n = strlen(z);
333   while( n>0 && isspace((unsigned char)z[n]) ) n--;
334   z[n] = 0;
335 }
336 
337 /* Dequote the string */
338 static void csv_dequote(char *z){
339   int i, j;
340   char cQuote = z[0];
341   size_t n;
342 
343   if( cQuote!='\'' && cQuote!='"' ) return;
344   n = strlen(z);
345   if( n<2 || z[n-1]!=z[0] ) return;
346   for(i=1, j=0; i<n-1; i++){
347     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
348     z[j++] = z[i];
349   }
350   z[j] = 0;
351 }
352 
353 /* Check to see if the string is of the form:  "TAG = VALUE" with optional
354 ** whitespace before and around tokens.  If it is, return a pointer to the
355 ** first character of VALUE.  If it is not, return NULL.
356 */
357 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
358   z = csv_skip_whitespace(z);
359   if( strncmp(zTag, z, nTag)!=0 ) return 0;
360   z = csv_skip_whitespace(z+nTag);
361   if( z[0]!='=' ) return 0;
362   return csv_skip_whitespace(z+1);
363 }
364 
365 /* Decode a parameter that requires a dequoted string.
366 **
367 ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
368 ** even if there is an error.  If an error occurs, then an error message
369 ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
370 */
371 static int csv_string_parameter(
372   CsvReader *p,            /* Leave the error message here, if there is one */
373   const char *zParam,      /* Parameter we are checking for */
374   const char *zArg,        /* Raw text of the virtual table argment */
375   char **pzVal             /* Write the dequoted string value here */
376 ){
377   const char *zValue;
378   zValue = csv_parameter(zParam,strlen(zParam),zArg);
379   if( zValue==0 ) return 0;
380   p->zErr[0] = 0;
381   if( *pzVal ){
382     csv_errmsg(p, "more than one '%s' parameter", zParam);
383     return 1;
384   }
385   *pzVal = sqlite3_mprintf("%s", zValue);
386   if( *pzVal==0 ){
387     csv_errmsg(p, "out of memory");
388     return 1;
389   }
390   csv_trim_whitespace(*pzVal);
391   csv_dequote(*pzVal);
392   return 1;
393 }
394 
395 
396 /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
397 ** we cannot really tell.
398 */
399 static int csv_boolean(const char *z){
400   if( sqlite3_stricmp("yes",z)==0
401    || sqlite3_stricmp("on",z)==0
402    || sqlite3_stricmp("true",z)==0
403    || (z[0]=='1' && z[0]==0)
404   ){
405     return 1;
406   }
407   if( sqlite3_stricmp("no",z)==0
408    || sqlite3_stricmp("off",z)==0
409    || sqlite3_stricmp("false",z)==0
410    || (z[0]=='0' && z[1]==0)
411   ){
412     return 0;
413   }
414   return -1;
415 }
416 
417 
418 /*
419 ** Parameters:
420 **    filename=FILENAME          Name of file containing CSV content
421 **    data=TEXT                  Direct CSV content.
422 **    schema=SCHEMA              Alternative CSV schema.
423 **    header=YES|NO              First row of CSV defines the names of
424 **                               columns if "yes".  Default "no".
425 **    columns=N                  Assume the CSV file contains N columns.
426 **
427 ** Only available if compiled with SQLITE_TEST:
428 **
429 **    testflags=N                Bitmask of test flags.  Optional
430 **
431 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
432 ** and so forth.  If columns=N is omitted, then the file is opened and
433 ** the number of columns in the first row is counted to determine the
434 ** column count.  If header=YES, then the first row is skipped.
435 */
436 static int csvtabConnect(
437   sqlite3 *db,
438   void *pAux,
439   int argc, const char *const*argv,
440   sqlite3_vtab **ppVtab,
441   char **pzErr
442 ){
443   CsvTable *pNew = 0;        /* The CsvTable object to construct */
444   int bHeader = -1;          /* header= flags.  -1 means not seen yet */
445   int rc = SQLITE_OK;        /* Result code from this routine */
446   int i, j;                  /* Loop counters */
447 #ifdef SQLITE_TEST
448   int tstFlags = 0;          /* Value for testflags=N parameter */
449 #endif
450   int nCol = -99;            /* Value of the columns= parameter */
451   CsvReader sRdr;            /* A CSV file reader used to store an error
452                              ** message and/or to count the number of columns */
453   static const char *azParam[] = {
454      "filename", "data", "schema",
455   };
456   char *azPValue[3];         /* Parameter values */
457 # define CSV_FILENAME (azPValue[0])
458 # define CSV_DATA     (azPValue[1])
459 # define CSV_SCHEMA   (azPValue[2])
460 
461 
462   assert( sizeof(azPValue)==sizeof(azParam) );
463   memset(&sRdr, 0, sizeof(sRdr));
464   memset(azPValue, 0, sizeof(azPValue));
465   for(i=3; i<argc; i++){
466     const char *z = argv[i];
467     const char *zValue;
468     for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
469       if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
470     }
471     if( j<sizeof(azParam)/sizeof(azParam[0]) ){
472       if( sRdr.zErr[0] ) goto csvtab_connect_error;
473     }else
474     if( (zValue = csv_parameter("header",6,z))!=0 ){
475       int x;
476       if( bHeader>=0 ){
477         csv_errmsg(&sRdr, "more than one 'header' parameter");
478         goto csvtab_connect_error;
479       }
480       x = csv_boolean(zValue);
481       if( x==1 ){
482         bHeader = 1;
483       }else if( x==0 ){
484         bHeader = 0;
485       }else{
486         csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
487         goto csvtab_connect_error;
488       }
489     }else
490 #ifdef SQLITE_TEST
491     if( (zValue = csv_parameter("testflags",9,z))!=0 ){
492       tstFlags = (unsigned int)atoi(zValue);
493     }else
494 #endif
495     if( (zValue = csv_parameter("columns",7,z))!=0 ){
496       if( nCol>0 ){
497         csv_errmsg(&sRdr, "more than one 'columns' parameter");
498         goto csvtab_connect_error;
499       }
500       nCol = atoi(zValue);
501       if( nCol<=0 ){
502         csv_errmsg(&sRdr, "must have at least one column");
503         goto csvtab_connect_error;
504       }
505     }else
506     {
507       csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
508       goto csvtab_connect_error;
509     }
510   }
511   if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
512     csv_errmsg(&sRdr, "must either filename= or data= but not both");
513     goto csvtab_connect_error;
514   }
515   if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
516     goto csvtab_connect_error;
517   }
518   pNew = sqlite3_malloc( sizeof(*pNew) );
519   *ppVtab = (sqlite3_vtab*)pNew;
520   if( pNew==0 ) goto csvtab_connect_oom;
521   memset(pNew, 0, sizeof(*pNew));
522   if( nCol>0 ){
523     pNew->nCol = nCol;
524   }else{
525     do{
526       const char *z = csv_read_one_field(&sRdr);
527       if( z==0 ) goto csvtab_connect_oom;
528       pNew->nCol++;
529     }while( sRdr.cTerm==',' );
530   }
531   pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
532   pNew->zData = CSV_DATA;          CSV_DATA = 0;
533 #ifdef SQLITE_TEST
534   pNew->tstFlags = tstFlags;
535 #endif
536   pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
537   csv_reader_reset(&sRdr);
538   if( CSV_SCHEMA==0 ){
539     char *zSep = "";
540     CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
541     if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
542     for(i=0; i<pNew->nCol; i++){
543       CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
544       zSep = ",";
545     }
546     CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
547   }
548   rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
549   if( rc ) goto csvtab_connect_error;
550   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
551     sqlite3_free(azPValue[i]);
552   }
553   return SQLITE_OK;
554 
555 csvtab_connect_oom:
556   rc = SQLITE_NOMEM;
557   csv_errmsg(&sRdr, "out of memory");
558 
559 csvtab_connect_error:
560   if( pNew ) csvtabDisconnect(&pNew->base);
561   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
562     sqlite3_free(azPValue[i]);
563   }
564   if( sRdr.zErr[0] ){
565     sqlite3_free(*pzErr);
566     *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
567   }
568   csv_reader_reset(&sRdr);
569   if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
570   return rc;
571 }
572 
573 /*
574 ** Reset the current row content held by a CsvCursor.
575 */
576 static void csvtabCursorRowReset(CsvCursor *pCur){
577   CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
578   int i;
579   for(i=0; i<pTab->nCol; i++){
580     sqlite3_free(pCur->azVal[i]);
581     pCur->azVal[i] = 0;
582     pCur->aLen[i] = 0;
583   }
584 }
585 
586 /*
587 ** The xConnect and xCreate methods do the same thing, but they must be
588 ** different so that the virtual table is not an eponymous virtual table.
589 */
590 static int csvtabCreate(
591   sqlite3 *db,
592   void *pAux,
593   int argc, const char *const*argv,
594   sqlite3_vtab **ppVtab,
595   char **pzErr
596 ){
597  return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
598 }
599 
600 /*
601 ** Destructor for a CsvCursor.
602 */
603 static int csvtabClose(sqlite3_vtab_cursor *cur){
604   CsvCursor *pCur = (CsvCursor*)cur;
605   csvtabCursorRowReset(pCur);
606   csv_reader_reset(&pCur->rdr);
607   sqlite3_free(cur);
608   return SQLITE_OK;
609 }
610 
611 /*
612 ** Constructor for a new CsvTable cursor object.
613 */
614 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
615   CsvTable *pTab = (CsvTable*)p;
616   CsvCursor *pCur;
617   size_t nByte;
618   nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
619   pCur = sqlite3_malloc( nByte );
620   if( pCur==0 ) return SQLITE_NOMEM;
621   memset(pCur, 0, nByte);
622   pCur->azVal = (char**)&pCur[1];
623   pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
624   *ppCursor = &pCur->base;
625   if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
626     csv_xfer_error(pTab, &pCur->rdr);
627     return SQLITE_ERROR;
628   }
629   return SQLITE_OK;
630 }
631 
632 
633 /*
634 ** Advance a CsvCursor to its next row of input.
635 ** Set the EOF marker if we reach the end of input.
636 */
637 static int csvtabNext(sqlite3_vtab_cursor *cur){
638   CsvCursor *pCur = (CsvCursor*)cur;
639   CsvTable *pTab = (CsvTable*)cur->pVtab;
640   int i = 0;
641   char *z;
642   do{
643     z = csv_read_one_field(&pCur->rdr);
644     if( z==0 ){
645       csv_xfer_error(pTab, &pCur->rdr);
646       break;
647     }
648     if( i<pTab->nCol ){
649       if( pCur->aLen[i] < pCur->rdr.n+1 ){
650         char *zNew = sqlite3_realloc(pCur->azVal[i], pCur->rdr.n+1);
651         if( zNew==0 ){
652           csv_errmsg(&pCur->rdr, "out of memory");
653           csv_xfer_error(pTab, &pCur->rdr);
654           break;
655         }
656         pCur->azVal[i] = zNew;
657         pCur->aLen[i] = pCur->rdr.n+1;
658       }
659       memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
660       i++;
661     }
662   }while( pCur->rdr.cTerm==',' );
663   while( i<pTab->nCol ){
664     sqlite3_free(pCur->azVal[i]);
665     pCur->azVal[i] = 0;
666     pCur->aLen[i] = 0;
667     i++;
668   }
669   if( z==0 || pCur->rdr.cTerm==EOF ){
670     pCur->iRowid = -1;
671   }else{
672     pCur->iRowid++;
673   }
674   return SQLITE_OK;
675 }
676 
677 /*
678 ** Return values of columns for the row at which the CsvCursor
679 ** is currently pointing.
680 */
681 static int csvtabColumn(
682   sqlite3_vtab_cursor *cur,   /* The cursor */
683   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
684   int i                       /* Which column to return */
685 ){
686   CsvCursor *pCur = (CsvCursor*)cur;
687   CsvTable *pTab = (CsvTable*)cur->pVtab;
688   if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
689     sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
690   }
691   return SQLITE_OK;
692 }
693 
694 /*
695 ** Return the rowid for the current row.
696 */
697 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
698   CsvCursor *pCur = (CsvCursor*)cur;
699   *pRowid = pCur->iRowid;
700   return SQLITE_OK;
701 }
702 
703 /*
704 ** Return TRUE if the cursor has been moved off of the last
705 ** row of output.
706 */
707 static int csvtabEof(sqlite3_vtab_cursor *cur){
708   CsvCursor *pCur = (CsvCursor*)cur;
709   return pCur->iRowid<0;
710 }
711 
712 /*
713 ** Only a full table scan is supported.  So xFilter simply rewinds to
714 ** the beginning.
715 */
716 static int csvtabFilter(
717   sqlite3_vtab_cursor *pVtabCursor,
718   int idxNum, const char *idxStr,
719   int argc, sqlite3_value **argv
720 ){
721   CsvCursor *pCur = (CsvCursor*)pVtabCursor;
722   CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
723   pCur->iRowid = 0;
724   if( pCur->rdr.in==0 ){
725     assert( pCur->rdr.zIn==pTab->zData );
726     assert( pTab->iStart<=pCur->rdr.nIn );
727     pCur->rdr.iIn = pTab->iStart;
728   }else{
729     fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
730     pCur->rdr.iIn = 0;
731     pCur->rdr.nIn = 0;
732   }
733   return csvtabNext(pVtabCursor);
734 }
735 
736 /*
737 ** Only a forward full table scan is supported.  xBestIndex is mostly
738 ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
739 ** constraints lowers the estimated cost, which is fiction, but is useful
740 ** for testing certain kinds of virtual table behavior.
741 */
742 static int csvtabBestIndex(
743   sqlite3_vtab *tab,
744   sqlite3_index_info *pIdxInfo
745 ){
746   pIdxInfo->estimatedCost = 1000000;
747 #ifdef SQLITE_TEST
748   if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
749     /* The usual (and sensible) case is to always do a full table scan.
750     ** The code in this branch only runs when testflags=1.  This code
751     ** generates an artifical and unrealistic plan which is useful
752     ** for testing virtual table logic but is not helpful to real applications.
753     **
754     ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
755     ** table (even though it is not) and the cost of running the virtual table
756     ** is reduced from 1 million to just 10.  The constraints are *not* marked
757     ** as omittable, however, so the query planner should still generate a
758     ** plan that gives a correct answer, even if they plan is not optimal.
759     */
760     int i;
761     int nConst = 0;
762     for(i=0; i<pIdxInfo->nConstraint; i++){
763       unsigned char op;
764       if( pIdxInfo->aConstraint[i].usable==0 ) continue;
765       op = pIdxInfo->aConstraint[i].op;
766       if( op==SQLITE_INDEX_CONSTRAINT_EQ
767        || op==SQLITE_INDEX_CONSTRAINT_LIKE
768        || op==SQLITE_INDEX_CONSTRAINT_GLOB
769       ){
770         pIdxInfo->estimatedCost = 10;
771         pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
772         nConst++;
773       }
774     }
775   }
776 #endif
777   return SQLITE_OK;
778 }
779 
780 
781 static sqlite3_module CsvModule = {
782   0,                       /* iVersion */
783   csvtabCreate,            /* xCreate */
784   csvtabConnect,           /* xConnect */
785   csvtabBestIndex,         /* xBestIndex */
786   csvtabDisconnect,        /* xDisconnect */
787   csvtabDisconnect,        /* xDestroy */
788   csvtabOpen,              /* xOpen - open a cursor */
789   csvtabClose,             /* xClose - close a cursor */
790   csvtabFilter,            /* xFilter - configure scan constraints */
791   csvtabNext,              /* xNext - advance a cursor */
792   csvtabEof,               /* xEof - check for end of scan */
793   csvtabColumn,            /* xColumn - read data */
794   csvtabRowid,             /* xRowid - read data */
795   0,                       /* xUpdate */
796   0,                       /* xBegin */
797   0,                       /* xSync */
798   0,                       /* xCommit */
799   0,                       /* xRollback */
800   0,                       /* xFindMethod */
801   0,                       /* xRename */
802 };
803 
804 #ifdef SQLITE_TEST
805 /*
806 ** For virtual table testing, make a version of the CSV virtual table
807 ** available that has an xUpdate function.  But the xUpdate always returns
808 ** SQLITE_READONLY since the CSV file is not really writable.
809 */
810 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
811   return SQLITE_READONLY;
812 }
813 static sqlite3_module CsvModuleFauxWrite = {
814   0,                       /* iVersion */
815   csvtabCreate,            /* xCreate */
816   csvtabConnect,           /* xConnect */
817   csvtabBestIndex,         /* xBestIndex */
818   csvtabDisconnect,        /* xDisconnect */
819   csvtabDisconnect,        /* xDestroy */
820   csvtabOpen,              /* xOpen - open a cursor */
821   csvtabClose,             /* xClose - close a cursor */
822   csvtabFilter,            /* xFilter - configure scan constraints */
823   csvtabNext,              /* xNext - advance a cursor */
824   csvtabEof,               /* xEof - check for end of scan */
825   csvtabColumn,            /* xColumn - read data */
826   csvtabRowid,             /* xRowid - read data */
827   csvtabUpdate,            /* xUpdate */
828   0,                       /* xBegin */
829   0,                       /* xSync */
830   0,                       /* xCommit */
831   0,                       /* xRollback */
832   0,                       /* xFindMethod */
833   0,                       /* xRename */
834 };
835 #endif /* SQLITE_TEST */
836 
837 
838 
839 #ifdef _WIN32
840 __declspec(dllexport)
841 #endif
842 /*
843 ** This routine is called when the extension is loaded.  The new
844 ** CSV virtual table module is registered with the calling database
845 ** connection.
846 */
847 int sqlite3_csv_init(
848   sqlite3 *db,
849   char **pzErrMsg,
850   const sqlite3_api_routines *pApi
851 ){
852   int rc;
853   SQLITE_EXTENSION_INIT2(pApi);
854   rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
855 #ifdef SQLITE_TEST
856   if( rc==SQLITE_OK ){
857     rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
858   }
859 #endif
860   return rc;
861 }
862