xref: /sqlite-3.40.0/ext/misc/csv.c (revision dfe4e6bb)
1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
15 **
16 ** Usage:
17 **
18 **    .load ./csv
19 **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 **    SELECT * FROM csv;
21 **
22 ** The columns are named "c1", "c2", "c3", ... by default.  But the
23 ** application can define its own CREATE TABLE statement as an additional
24 ** parameter.  For example:
25 **
26 **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 **       filename = "../http.log",
28 **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 **    );
30 **
31 ** Instead of specifying a file, the text of the CSV can be loaded using
32 ** the data= parameter.
33 **
34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35 ** N columns.  If the columns parameter is omitted, the CSV file is opened
36 ** as soon as the virtual table is constructed and the first row of the CSV
37 ** is read in order to count the tables.
38 **
39 ** Some extra debugging features (used for testing virtual tables) are available
40 ** if this module is compiled with -DSQLITE_TEST.
41 */
42 #include <sqlite3ext.h>
43 SQLITE_EXTENSION_INIT1
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <ctype.h>
49 #include <stdio.h>
50 
51 #ifndef SQLITE_OMIT_VIRTUALTABLE
52 
53 /*
54 ** A macro to hint to the compiler that a function should not be
55 ** inlined.
56 */
57 #if defined(__GNUC__)
58 #  define CSV_NOINLINE  __attribute__((noinline))
59 #elif defined(_MSC_VER) && _MSC_VER>=1310
60 #  define CSV_NOINLINE  __declspec(noinline)
61 #else
62 #  define CSV_NOINLINE
63 #endif
64 
65 
66 /* Max size of the error message in a CsvReader */
67 #define CSV_MXERR 200
68 
69 /* Size of the CsvReader input buffer */
70 #define CSV_INBUFSZ 1024
71 
72 /* A context object used when read a CSV file. */
73 typedef struct CsvReader CsvReader;
74 struct CsvReader {
75   FILE *in;              /* Read the CSV text from this input stream */
76   char *z;               /* Accumulated text for a field */
77   int n;                 /* Number of bytes in z */
78   int nAlloc;            /* Space allocated for z[] */
79   int nLine;             /* Current line number */
80   char cTerm;            /* Character that terminated the most recent field */
81   size_t iIn;            /* Next unread character in the input buffer */
82   size_t nIn;            /* Number of characters in the input buffer */
83   char *zIn;             /* The input buffer */
84   char zErr[CSV_MXERR];  /* Error message */
85 };
86 
87 /* Initialize a CsvReader object */
88 static void csv_reader_init(CsvReader *p){
89   p->in = 0;
90   p->z = 0;
91   p->n = 0;
92   p->nAlloc = 0;
93   p->nLine = 0;
94   p->nIn = 0;
95   p->zIn = 0;
96   p->zErr[0] = 0;
97 }
98 
99 /* Close and reset a CsvReader object */
100 static void csv_reader_reset(CsvReader *p){
101   if( p->in ){
102     fclose(p->in);
103     sqlite3_free(p->zIn);
104   }
105   sqlite3_free(p->z);
106   csv_reader_init(p);
107 }
108 
109 /* Report an error on a CsvReader */
110 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
111   va_list ap;
112   va_start(ap, zFormat);
113   sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
114   va_end(ap);
115 }
116 
117 /* Open the file associated with a CsvReader
118 ** Return the number of errors.
119 */
120 static int csv_reader_open(
121   CsvReader *p,               /* The reader to open */
122   const char *zFilename,      /* Read from this filename */
123   const char *zData           /*  ... or use this data */
124 ){
125   if( zFilename ){
126     p->zIn = sqlite3_malloc( CSV_INBUFSZ );
127     if( p->zIn==0 ){
128       csv_errmsg(p, "out of memory");
129       return 1;
130     }
131     p->in = fopen(zFilename, "rb");
132     if( p->in==0 ){
133       csv_reader_reset(p);
134       csv_errmsg(p, "cannot open '%s' for reading", zFilename);
135       return 1;
136     }
137   }else{
138     assert( p->in==0 );
139     p->zIn = (char*)zData;
140     p->nIn = strlen(zData);
141   }
142   return 0;
143 }
144 
145 /* The input buffer has overflowed.  Refill the input buffer, then
146 ** return the next character
147 */
148 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
149   size_t got;
150 
151   assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
152   assert( p->in!=0 );        /* Only called if reading froma file */
153 
154   got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
155   if( got==0 ) return EOF;
156   p->nIn = got;
157   p->iIn = 1;
158   return p->zIn[0];
159 }
160 
161 /* Return the next character of input.  Return EOF at end of input. */
162 static int csv_getc(CsvReader *p){
163   if( p->iIn >= p->nIn ){
164     if( p->in!=0 ) return csv_getc_refill(p);
165     return EOF;
166   }
167   return p->zIn[p->iIn++];
168 }
169 
170 /* Increase the size of p->z and append character c to the end.
171 ** Return 0 on success and non-zero if there is an OOM error */
172 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
173   char *zNew;
174   int nNew = p->nAlloc*2 + 100;
175   zNew = sqlite3_realloc64(p->z, nNew);
176   if( zNew ){
177     p->z = zNew;
178     p->nAlloc = nNew;
179     p->z[p->n++] = c;
180     return 0;
181   }else{
182     csv_errmsg(p, "out of memory");
183     return 1;
184   }
185 }
186 
187 /* Append a single character to the CsvReader.z[] array.
188 ** Return 0 on success and non-zero if there is an OOM error */
189 static int csv_append(CsvReader *p, char c){
190   if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
191   p->z[p->n++] = c;
192   return 0;
193 }
194 
195 /* Read a single field of CSV text.  Compatible with rfc4180 and extended
196 ** with the option of having a separator other than ",".
197 **
198 **   +  Input comes from p->in.
199 **   +  Store results in p->z of length p->n.  Space to hold p->z comes
200 **      from sqlite3_malloc64().
201 **   +  Keep track of the line number in p->nLine.
202 **   +  Store the character that terminates the field in p->cTerm.  Store
203 **      EOF on end-of-file.
204 **
205 ** Return "" at EOF.  Return 0 on an OOM error.
206 */
207 static char *csv_read_one_field(CsvReader *p){
208   int c;
209   p->n = 0;
210   c = csv_getc(p);
211   if( c==EOF ){
212     p->cTerm = EOF;
213     return "";
214   }
215   if( c=='"' ){
216     int pc, ppc;
217     int startLine = p->nLine;
218     pc = ppc = 0;
219     while( 1 ){
220       c = csv_getc(p);
221       if( c<='"' || pc=='"' ){
222         if( c=='\n' ) p->nLine++;
223         if( c=='"' ){
224           if( pc=='"' ){
225             pc = 0;
226             continue;
227           }
228         }
229         if( (c==',' && pc=='"')
230          || (c=='\n' && pc=='"')
231          || (c=='\n' && pc=='\r' && ppc=='"')
232          || (c==EOF && pc=='"')
233         ){
234           do{ p->n--; }while( p->z[p->n]!='"' );
235           p->cTerm = (char)c;
236           break;
237         }
238         if( pc=='"' && c!='\r' ){
239           csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
240           break;
241         }
242         if( c==EOF ){
243           csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
244                      startLine, '"');
245           p->cTerm = (char)c;
246           break;
247         }
248       }
249       if( csv_append(p, (char)c) ) return 0;
250       ppc = pc;
251       pc = c;
252     }
253   }else{
254     while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
255       if( csv_append(p, (char)c) ) return 0;
256       c = csv_getc(p);
257     }
258     if( c=='\n' ){
259       p->nLine++;
260       if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
261     }
262     p->cTerm = (char)c;
263   }
264   if( p->z ) p->z[p->n] = 0;
265   return p->z;
266 }
267 
268 
269 /* Forward references to the various virtual table methods implemented
270 ** in this file. */
271 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
272                            sqlite3_vtab**,char**);
273 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
274                            sqlite3_vtab**,char**);
275 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
276 static int csvtabDisconnect(sqlite3_vtab*);
277 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
278 static int csvtabClose(sqlite3_vtab_cursor*);
279 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
280                           int argc, sqlite3_value **argv);
281 static int csvtabNext(sqlite3_vtab_cursor*);
282 static int csvtabEof(sqlite3_vtab_cursor*);
283 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
284 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
285 
286 /* An instance of the CSV virtual table */
287 typedef struct CsvTable {
288   sqlite3_vtab base;              /* Base class.  Must be first */
289   char *zFilename;                /* Name of the CSV file */
290   char *zData;                    /* Raw CSV data in lieu of zFilename */
291   long iStart;                    /* Offset to start of data in zFilename */
292   int nCol;                       /* Number of columns in the CSV file */
293   unsigned int tstFlags;          /* Bit values used for testing */
294 } CsvTable;
295 
296 /* Allowed values for tstFlags */
297 #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
298 
299 /* A cursor for the CSV virtual table */
300 typedef struct CsvCursor {
301   sqlite3_vtab_cursor base;       /* Base class.  Must be first */
302   CsvReader rdr;                  /* The CsvReader object */
303   char **azVal;                   /* Value of the current row */
304   int *aLen;                      /* Length of each entry */
305   sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
306 } CsvCursor;
307 
308 /* Transfer error message text from a reader into a CsvTable */
309 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
310   sqlite3_free(pTab->base.zErrMsg);
311   pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
312 }
313 
314 /*
315 ** This method is the destructor fo a CsvTable object.
316 */
317 static int csvtabDisconnect(sqlite3_vtab *pVtab){
318   CsvTable *p = (CsvTable*)pVtab;
319   sqlite3_free(p->zFilename);
320   sqlite3_free(p->zData);
321   sqlite3_free(p);
322   return SQLITE_OK;
323 }
324 
325 /* Skip leading whitespace.  Return a pointer to the first non-whitespace
326 ** character, or to the zero terminator if the string has only whitespace */
327 static const char *csv_skip_whitespace(const char *z){
328   while( isspace((unsigned char)z[0]) ) z++;
329   return z;
330 }
331 
332 /* Remove trailing whitespace from the end of string z[] */
333 static void csv_trim_whitespace(char *z){
334   size_t n = strlen(z);
335   while( n>0 && isspace((unsigned char)z[n]) ) n--;
336   z[n] = 0;
337 }
338 
339 /* Dequote the string */
340 static void csv_dequote(char *z){
341   int j;
342   char cQuote = z[0];
343   size_t i, n;
344 
345   if( cQuote!='\'' && cQuote!='"' ) return;
346   n = strlen(z);
347   if( n<2 || z[n-1]!=z[0] ) return;
348   for(i=1, j=0; i<n-1; i++){
349     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
350     z[j++] = z[i];
351   }
352   z[j] = 0;
353 }
354 
355 /* Check to see if the string is of the form:  "TAG = VALUE" with optional
356 ** whitespace before and around tokens.  If it is, return a pointer to the
357 ** first character of VALUE.  If it is not, return NULL.
358 */
359 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
360   z = csv_skip_whitespace(z);
361   if( strncmp(zTag, z, nTag)!=0 ) return 0;
362   z = csv_skip_whitespace(z+nTag);
363   if( z[0]!='=' ) return 0;
364   return csv_skip_whitespace(z+1);
365 }
366 
367 /* Decode a parameter that requires a dequoted string.
368 **
369 ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
370 ** even if there is an error.  If an error occurs, then an error message
371 ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
372 */
373 static int csv_string_parameter(
374   CsvReader *p,            /* Leave the error message here, if there is one */
375   const char *zParam,      /* Parameter we are checking for */
376   const char *zArg,        /* Raw text of the virtual table argment */
377   char **pzVal             /* Write the dequoted string value here */
378 ){
379   const char *zValue;
380   zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
381   if( zValue==0 ) return 0;
382   p->zErr[0] = 0;
383   if( *pzVal ){
384     csv_errmsg(p, "more than one '%s' parameter", zParam);
385     return 1;
386   }
387   *pzVal = sqlite3_mprintf("%s", zValue);
388   if( *pzVal==0 ){
389     csv_errmsg(p, "out of memory");
390     return 1;
391   }
392   csv_trim_whitespace(*pzVal);
393   csv_dequote(*pzVal);
394   return 1;
395 }
396 
397 
398 /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
399 ** we cannot really tell.
400 */
401 static int csv_boolean(const char *z){
402   if( sqlite3_stricmp("yes",z)==0
403    || sqlite3_stricmp("on",z)==0
404    || sqlite3_stricmp("true",z)==0
405    || (z[0]=='1' && z[0]==0)
406   ){
407     return 1;
408   }
409   if( sqlite3_stricmp("no",z)==0
410    || sqlite3_stricmp("off",z)==0
411    || sqlite3_stricmp("false",z)==0
412    || (z[0]=='0' && z[1]==0)
413   ){
414     return 0;
415   }
416   return -1;
417 }
418 
419 
420 /*
421 ** Parameters:
422 **    filename=FILENAME          Name of file containing CSV content
423 **    data=TEXT                  Direct CSV content.
424 **    schema=SCHEMA              Alternative CSV schema.
425 **    header=YES|NO              First row of CSV defines the names of
426 **                               columns if "yes".  Default "no".
427 **    columns=N                  Assume the CSV file contains N columns.
428 **
429 ** Only available if compiled with SQLITE_TEST:
430 **
431 **    testflags=N                Bitmask of test flags.  Optional
432 **
433 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
434 ** and so forth.  If columns=N is omitted, then the file is opened and
435 ** the number of columns in the first row is counted to determine the
436 ** column count.  If header=YES, then the first row is skipped.
437 */
438 static int csvtabConnect(
439   sqlite3 *db,
440   void *pAux,
441   int argc, const char *const*argv,
442   sqlite3_vtab **ppVtab,
443   char **pzErr
444 ){
445   CsvTable *pNew = 0;        /* The CsvTable object to construct */
446   int bHeader = -1;          /* header= flags.  -1 means not seen yet */
447   int rc = SQLITE_OK;        /* Result code from this routine */
448   int i, j;                  /* Loop counters */
449 #ifdef SQLITE_TEST
450   int tstFlags = 0;          /* Value for testflags=N parameter */
451 #endif
452   int nCol = -99;            /* Value of the columns= parameter */
453   CsvReader sRdr;            /* A CSV file reader used to store an error
454                              ** message and/or to count the number of columns */
455   static const char *azParam[] = {
456      "filename", "data", "schema",
457   };
458   char *azPValue[3];         /* Parameter values */
459 # define CSV_FILENAME (azPValue[0])
460 # define CSV_DATA     (azPValue[1])
461 # define CSV_SCHEMA   (azPValue[2])
462 
463 
464   assert( sizeof(azPValue)==sizeof(azParam) );
465   memset(&sRdr, 0, sizeof(sRdr));
466   memset(azPValue, 0, sizeof(azPValue));
467   for(i=3; i<argc; i++){
468     const char *z = argv[i];
469     const char *zValue;
470     for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
471       if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
472     }
473     if( j<sizeof(azParam)/sizeof(azParam[0]) ){
474       if( sRdr.zErr[0] ) goto csvtab_connect_error;
475     }else
476     if( (zValue = csv_parameter("header",6,z))!=0 ){
477       int x;
478       if( bHeader>=0 ){
479         csv_errmsg(&sRdr, "more than one 'header' parameter");
480         goto csvtab_connect_error;
481       }
482       x = csv_boolean(zValue);
483       if( x==1 ){
484         bHeader = 1;
485       }else if( x==0 ){
486         bHeader = 0;
487       }else{
488         csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
489         goto csvtab_connect_error;
490       }
491     }else
492 #ifdef SQLITE_TEST
493     if( (zValue = csv_parameter("testflags",9,z))!=0 ){
494       tstFlags = (unsigned int)atoi(zValue);
495     }else
496 #endif
497     if( (zValue = csv_parameter("columns",7,z))!=0 ){
498       if( nCol>0 ){
499         csv_errmsg(&sRdr, "more than one 'columns' parameter");
500         goto csvtab_connect_error;
501       }
502       nCol = atoi(zValue);
503       if( nCol<=0 ){
504         csv_errmsg(&sRdr, "must have at least one column");
505         goto csvtab_connect_error;
506       }
507     }else
508     {
509       csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
510       goto csvtab_connect_error;
511     }
512   }
513   if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
514     csv_errmsg(&sRdr, "must either filename= or data= but not both");
515     goto csvtab_connect_error;
516   }
517   if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
518     goto csvtab_connect_error;
519   }
520   pNew = sqlite3_malloc( sizeof(*pNew) );
521   *ppVtab = (sqlite3_vtab*)pNew;
522   if( pNew==0 ) goto csvtab_connect_oom;
523   memset(pNew, 0, sizeof(*pNew));
524   if( nCol>0 ){
525     pNew->nCol = nCol;
526   }else{
527     do{
528       const char *z = csv_read_one_field(&sRdr);
529       if( z==0 ) goto csvtab_connect_oom;
530       pNew->nCol++;
531     }while( sRdr.cTerm==',' );
532   }
533   pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
534   pNew->zData = CSV_DATA;          CSV_DATA = 0;
535 #ifdef SQLITE_TEST
536   pNew->tstFlags = tstFlags;
537 #endif
538   pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
539   csv_reader_reset(&sRdr);
540   if( CSV_SCHEMA==0 ){
541     char *zSep = "";
542     CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
543     if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
544     for(i=0; i<pNew->nCol; i++){
545       CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
546       zSep = ",";
547     }
548     CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
549   }
550   rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
551   if( rc ) goto csvtab_connect_error;
552   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
553     sqlite3_free(azPValue[i]);
554   }
555   return SQLITE_OK;
556 
557 csvtab_connect_oom:
558   rc = SQLITE_NOMEM;
559   csv_errmsg(&sRdr, "out of memory");
560 
561 csvtab_connect_error:
562   if( pNew ) csvtabDisconnect(&pNew->base);
563   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
564     sqlite3_free(azPValue[i]);
565   }
566   if( sRdr.zErr[0] ){
567     sqlite3_free(*pzErr);
568     *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
569   }
570   csv_reader_reset(&sRdr);
571   if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
572   return rc;
573 }
574 
575 /*
576 ** Reset the current row content held by a CsvCursor.
577 */
578 static void csvtabCursorRowReset(CsvCursor *pCur){
579   CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
580   int i;
581   for(i=0; i<pTab->nCol; i++){
582     sqlite3_free(pCur->azVal[i]);
583     pCur->azVal[i] = 0;
584     pCur->aLen[i] = 0;
585   }
586 }
587 
588 /*
589 ** The xConnect and xCreate methods do the same thing, but they must be
590 ** different so that the virtual table is not an eponymous virtual table.
591 */
592 static int csvtabCreate(
593   sqlite3 *db,
594   void *pAux,
595   int argc, const char *const*argv,
596   sqlite3_vtab **ppVtab,
597   char **pzErr
598 ){
599  return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
600 }
601 
602 /*
603 ** Destructor for a CsvCursor.
604 */
605 static int csvtabClose(sqlite3_vtab_cursor *cur){
606   CsvCursor *pCur = (CsvCursor*)cur;
607   csvtabCursorRowReset(pCur);
608   csv_reader_reset(&pCur->rdr);
609   sqlite3_free(cur);
610   return SQLITE_OK;
611 }
612 
613 /*
614 ** Constructor for a new CsvTable cursor object.
615 */
616 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
617   CsvTable *pTab = (CsvTable*)p;
618   CsvCursor *pCur;
619   size_t nByte;
620   nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
621   pCur = sqlite3_malloc64( nByte );
622   if( pCur==0 ) return SQLITE_NOMEM;
623   memset(pCur, 0, nByte);
624   pCur->azVal = (char**)&pCur[1];
625   pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
626   *ppCursor = &pCur->base;
627   if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
628     csv_xfer_error(pTab, &pCur->rdr);
629     return SQLITE_ERROR;
630   }
631   return SQLITE_OK;
632 }
633 
634 
635 /*
636 ** Advance a CsvCursor to its next row of input.
637 ** Set the EOF marker if we reach the end of input.
638 */
639 static int csvtabNext(sqlite3_vtab_cursor *cur){
640   CsvCursor *pCur = (CsvCursor*)cur;
641   CsvTable *pTab = (CsvTable*)cur->pVtab;
642   int i = 0;
643   char *z;
644   do{
645     z = csv_read_one_field(&pCur->rdr);
646     if( z==0 ){
647       csv_xfer_error(pTab, &pCur->rdr);
648       break;
649     }
650     if( i<pTab->nCol ){
651       if( pCur->aLen[i] < pCur->rdr.n+1 ){
652         char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
653         if( zNew==0 ){
654           csv_errmsg(&pCur->rdr, "out of memory");
655           csv_xfer_error(pTab, &pCur->rdr);
656           break;
657         }
658         pCur->azVal[i] = zNew;
659         pCur->aLen[i] = pCur->rdr.n+1;
660       }
661       memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
662       i++;
663     }
664   }while( pCur->rdr.cTerm==',' );
665   while( i<pTab->nCol ){
666     sqlite3_free(pCur->azVal[i]);
667     pCur->azVal[i] = 0;
668     pCur->aLen[i] = 0;
669     i++;
670   }
671   if( z==0 || pCur->rdr.cTerm==EOF ){
672     pCur->iRowid = -1;
673   }else{
674     pCur->iRowid++;
675   }
676   return SQLITE_OK;
677 }
678 
679 /*
680 ** Return values of columns for the row at which the CsvCursor
681 ** is currently pointing.
682 */
683 static int csvtabColumn(
684   sqlite3_vtab_cursor *cur,   /* The cursor */
685   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
686   int i                       /* Which column to return */
687 ){
688   CsvCursor *pCur = (CsvCursor*)cur;
689   CsvTable *pTab = (CsvTable*)cur->pVtab;
690   if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
691     sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
692   }
693   return SQLITE_OK;
694 }
695 
696 /*
697 ** Return the rowid for the current row.
698 */
699 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
700   CsvCursor *pCur = (CsvCursor*)cur;
701   *pRowid = pCur->iRowid;
702   return SQLITE_OK;
703 }
704 
705 /*
706 ** Return TRUE if the cursor has been moved off of the last
707 ** row of output.
708 */
709 static int csvtabEof(sqlite3_vtab_cursor *cur){
710   CsvCursor *pCur = (CsvCursor*)cur;
711   return pCur->iRowid<0;
712 }
713 
714 /*
715 ** Only a full table scan is supported.  So xFilter simply rewinds to
716 ** the beginning.
717 */
718 static int csvtabFilter(
719   sqlite3_vtab_cursor *pVtabCursor,
720   int idxNum, const char *idxStr,
721   int argc, sqlite3_value **argv
722 ){
723   CsvCursor *pCur = (CsvCursor*)pVtabCursor;
724   CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
725   pCur->iRowid = 0;
726   if( pCur->rdr.in==0 ){
727     assert( pCur->rdr.zIn==pTab->zData );
728     assert( pTab->iStart>=0 );
729     assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
730     pCur->rdr.iIn = pTab->iStart;
731   }else{
732     fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
733     pCur->rdr.iIn = 0;
734     pCur->rdr.nIn = 0;
735   }
736   return csvtabNext(pVtabCursor);
737 }
738 
739 /*
740 ** Only a forward full table scan is supported.  xBestIndex is mostly
741 ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
742 ** constraints lowers the estimated cost, which is fiction, but is useful
743 ** for testing certain kinds of virtual table behavior.
744 */
745 static int csvtabBestIndex(
746   sqlite3_vtab *tab,
747   sqlite3_index_info *pIdxInfo
748 ){
749   pIdxInfo->estimatedCost = 1000000;
750 #ifdef SQLITE_TEST
751   if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
752     /* The usual (and sensible) case is to always do a full table scan.
753     ** The code in this branch only runs when testflags=1.  This code
754     ** generates an artifical and unrealistic plan which is useful
755     ** for testing virtual table logic but is not helpful to real applications.
756     **
757     ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
758     ** table (even though it is not) and the cost of running the virtual table
759     ** is reduced from 1 million to just 10.  The constraints are *not* marked
760     ** as omittable, however, so the query planner should still generate a
761     ** plan that gives a correct answer, even if they plan is not optimal.
762     */
763     int i;
764     int nConst = 0;
765     for(i=0; i<pIdxInfo->nConstraint; i++){
766       unsigned char op;
767       if( pIdxInfo->aConstraint[i].usable==0 ) continue;
768       op = pIdxInfo->aConstraint[i].op;
769       if( op==SQLITE_INDEX_CONSTRAINT_EQ
770        || op==SQLITE_INDEX_CONSTRAINT_LIKE
771        || op==SQLITE_INDEX_CONSTRAINT_GLOB
772       ){
773         pIdxInfo->estimatedCost = 10;
774         pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
775         nConst++;
776       }
777     }
778   }
779 #endif
780   return SQLITE_OK;
781 }
782 
783 
784 static sqlite3_module CsvModule = {
785   0,                       /* iVersion */
786   csvtabCreate,            /* xCreate */
787   csvtabConnect,           /* xConnect */
788   csvtabBestIndex,         /* xBestIndex */
789   csvtabDisconnect,        /* xDisconnect */
790   csvtabDisconnect,        /* xDestroy */
791   csvtabOpen,              /* xOpen - open a cursor */
792   csvtabClose,             /* xClose - close a cursor */
793   csvtabFilter,            /* xFilter - configure scan constraints */
794   csvtabNext,              /* xNext - advance a cursor */
795   csvtabEof,               /* xEof - check for end of scan */
796   csvtabColumn,            /* xColumn - read data */
797   csvtabRowid,             /* xRowid - read data */
798   0,                       /* xUpdate */
799   0,                       /* xBegin */
800   0,                       /* xSync */
801   0,                       /* xCommit */
802   0,                       /* xRollback */
803   0,                       /* xFindMethod */
804   0,                       /* xRename */
805 };
806 
807 #ifdef SQLITE_TEST
808 /*
809 ** For virtual table testing, make a version of the CSV virtual table
810 ** available that has an xUpdate function.  But the xUpdate always returns
811 ** SQLITE_READONLY since the CSV file is not really writable.
812 */
813 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
814   return SQLITE_READONLY;
815 }
816 static sqlite3_module CsvModuleFauxWrite = {
817   0,                       /* iVersion */
818   csvtabCreate,            /* xCreate */
819   csvtabConnect,           /* xConnect */
820   csvtabBestIndex,         /* xBestIndex */
821   csvtabDisconnect,        /* xDisconnect */
822   csvtabDisconnect,        /* xDestroy */
823   csvtabOpen,              /* xOpen - open a cursor */
824   csvtabClose,             /* xClose - close a cursor */
825   csvtabFilter,            /* xFilter - configure scan constraints */
826   csvtabNext,              /* xNext - advance a cursor */
827   csvtabEof,               /* xEof - check for end of scan */
828   csvtabColumn,            /* xColumn - read data */
829   csvtabRowid,             /* xRowid - read data */
830   csvtabUpdate,            /* xUpdate */
831   0,                       /* xBegin */
832   0,                       /* xSync */
833   0,                       /* xCommit */
834   0,                       /* xRollback */
835   0,                       /* xFindMethod */
836   0,                       /* xRename */
837 };
838 #endif /* SQLITE_TEST */
839 
840 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
841 
842 
843 #ifdef _WIN32
844 __declspec(dllexport)
845 #endif
846 /*
847 ** This routine is called when the extension is loaded.  The new
848 ** CSV virtual table module is registered with the calling database
849 ** connection.
850 */
851 int sqlite3_csv_init(
852   sqlite3 *db,
853   char **pzErrMsg,
854   const sqlite3_api_routines *pApi
855 ){
856 #ifndef SQLITE_OMIT_VIRTUALTABLE
857   int rc;
858   SQLITE_EXTENSION_INIT2(pApi);
859   rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
860 #ifdef SQLITE_TEST
861   if( rc==SQLITE_OK ){
862     rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
863   }
864 #endif
865   return rc;
866 #else
867   return SQLITE_OK;
868 #endif
869 }
870