xref: /sqlite-3.40.0/ext/misc/csv.c (revision 35db31b2)
1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
15 **
16 ** Usage:
17 **
18 **    .load ./csv
19 **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 **    SELECT * FROM csv;
21 **
22 ** The columns are named "c1", "c2", "c3", ... by default.  But the
23 ** application can define its own CREATE TABLE statement as an additional
24 ** parameter.  For example:
25 **
26 **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 **       filename = "../http.log",
28 **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 **    );
30 */
31 #include <sqlite3ext.h>
32 SQLITE_EXTENSION_INIT1
33 #include <string.h>
34 #include <stdlib.h>
35 #include <assert.h>
36 #include <stdarg.h>
37 #include <ctype.h>
38 #include <stdio.h>
39 
40 /*
41 ** A macro to hint to the compiler that a function should not be
42 ** inlined.
43 */
44 #if defined(__GNUC__)
45 #  define CSV_NOINLINE  __attribute__((noinline))
46 #elif defined(_MSC_VER) && _MSC_VER>=1310
47 #  define CSV_NOINLINE  __declspec(noinline)
48 #else
49 #  define CSV_NOINLINE
50 #endif
51 
52 
53 /* Max size of the error message in a CsvReader */
54 #define CSV_MXERR 200
55 
56 /* Size of the CsvReader input buffer */
57 #define CSV_INBUFSZ 1024
58 
59 /* A context object used when read a CSV file. */
60 typedef struct CsvReader CsvReader;
61 struct CsvReader {
62   FILE *in;              /* Read the CSV text from this input stream */
63   char *z;               /* Accumulated text for a field */
64   int n;                 /* Number of bytes in z */
65   int nAlloc;            /* Space allocated for z[] */
66   int nLine;             /* Current line number */
67   char cTerm;            /* Character that terminated the most recent field */
68   size_t iIn;            /* Next unread character in the input buffer */
69   size_t nIn;            /* Number of characters in the input buffer */
70   char *zIn;             /* The input buffer */
71   char zErr[CSV_MXERR];  /* Error message */
72 };
73 
74 /* Initialize a CsvReader object */
75 static void csv_reader_init(CsvReader *p){
76   p->in = 0;
77   p->z = 0;
78   p->n = 0;
79   p->nAlloc = 0;
80   p->nLine = 0;
81   p->nIn = 0;
82   p->zIn = 0;
83   p->zErr[0] = 0;
84 }
85 
86 /* Close and reset a CsvReader object */
87 static void csv_reader_reset(CsvReader *p){
88   if( p->in ){
89     fclose(p->in);
90     sqlite3_free(p->zIn);
91   }
92   sqlite3_free(p->z);
93   csv_reader_init(p);
94 }
95 
96 /* Report an error on a CsvReader */
97 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
98   va_list ap;
99   va_start(ap, zFormat);
100   sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
101   va_end(ap);
102 }
103 
104 /* Open the file associated with a CsvReader
105 ** Return the number of errors.
106 */
107 static int csv_reader_open(
108   CsvReader *p,               /* The reader to open */
109   const char *zFilename,      /* Read from this filename */
110   const char *zData           /*  ... or use this data */
111 ){
112   if( zFilename ){
113     p->zIn = sqlite3_malloc( CSV_INBUFSZ );
114     if( p->zIn==0 ){
115       csv_errmsg(p, "out of memory");
116       return 1;
117     }
118     p->in = fopen(zFilename, "rb");
119     if( p->in==0 ){
120       csv_reader_reset(p);
121       csv_errmsg(p, "cannot open '%s' for reading", zFilename);
122       return 1;
123     }
124   }else{
125     assert( p->in==0 );
126     p->zIn = (char*)zData;
127     p->nIn = strlen(zData);
128   }
129   return 0;
130 }
131 
132 /* The input buffer has overflowed.  Refill the input buffer, then
133 ** return the next character
134 */
135 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
136   size_t got;
137 
138   assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
139   assert( p->in!=0 );        /* Only called if reading froma file */
140 
141   got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
142   if( got==0 ) return EOF;
143   p->nIn = got;
144   p->iIn = 1;
145   return p->zIn[0];
146 }
147 
148 /* Return the next character of input.  Return EOF at end of input. */
149 static int csv_getc(CsvReader *p){
150   if( p->iIn >= p->nIn ){
151     if( p->in!=0 ) return csv_getc_refill(p);
152     return EOF;
153   }
154   return p->zIn[p->iIn++];
155 }
156 
157 /* Increase the size of p->z and append character c to the end.
158 ** Return 0 on success and non-zero if there is an OOM error */
159 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
160   char *zNew;
161   int nNew = p->nAlloc*2 + 100;
162   zNew = sqlite3_realloc64(p->z, nNew);
163   if( zNew ){
164     p->z = zNew;
165     p->nAlloc = nNew;
166     p->z[p->n++] = c;
167     return 0;
168   }else{
169     csv_errmsg(p, "out of memory");
170     return 1;
171   }
172 }
173 
174 /* Append a single character to the CsvReader.z[] array.
175 ** Return 0 on success and non-zero if there is an OOM error */
176 static int csv_append(CsvReader *p, char c){
177   if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
178   p->z[p->n++] = c;
179   return 0;
180 }
181 
182 /* Read a single field of CSV text.  Compatible with rfc4180 and extended
183 ** with the option of having a separator other than ",".
184 **
185 **   +  Input comes from p->in.
186 **   +  Store results in p->z of length p->n.  Space to hold p->z comes
187 **      from sqlite3_malloc64().
188 **   +  Keep track of the line number in p->nLine.
189 **   +  Store the character that terminates the field in p->cTerm.  Store
190 **      EOF on end-of-file.
191 **
192 ** Return "" at EOF.  Return 0 on an OOM error.
193 */
194 static char *csv_read_one_field(CsvReader *p){
195   int c;
196   p->n = 0;
197   c = csv_getc(p);
198   if( c==EOF ){
199     p->cTerm = EOF;
200     return "";
201   }
202   if( c=='"' ){
203     int pc, ppc;
204     int startLine = p->nLine;
205     int cQuote = c;
206     pc = ppc = 0;
207     while( 1 ){
208       c = csv_getc(p);
209       if( c=='\n' ) p->nLine++;
210       if( c==cQuote ){
211         if( pc==cQuote ){
212           pc = 0;
213           continue;
214         }
215       }
216       if( (c==',' && pc==cQuote)
217        || (c=='\n' && pc==cQuote)
218        || (c=='\n' && pc=='\r' && ppc==cQuote)
219        || (c==EOF && pc==cQuote)
220       ){
221         do{ p->n--; }while( p->z[p->n]!=cQuote );
222         p->cTerm = c;
223         break;
224       }
225       if( pc==cQuote && c!='\r' ){
226         csv_errmsg(p, "line %d: unescaped %c character", p->nLine, cQuote);
227         break;
228       }
229       if( c==EOF ){
230         csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
231                    startLine, cQuote);
232         p->cTerm = c;
233         break;
234       }
235       if( csv_append(p, (char)c) ) return 0;
236       ppc = pc;
237       pc = c;
238     }
239   }else{
240     while( c!=EOF && c!=',' && c!='\n' ){
241       if( csv_append(p, (char)c) ) return 0;
242       c = csv_getc(p);
243     }
244     if( c=='\n' ){
245       p->nLine++;
246       if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
247     }
248     p->cTerm = c;
249   }
250   if( p->z ) p->z[p->n] = 0;
251   return p->z;
252 }
253 
254 
255 /* Forward references to the various virtual table methods implemented
256 ** in this file. */
257 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
258                            sqlite3_vtab**,char**);
259 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
260                            sqlite3_vtab**,char**);
261 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
262 static int csvtabDisconnect(sqlite3_vtab*);
263 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
264 static int csvtabClose(sqlite3_vtab_cursor*);
265 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
266                           int argc, sqlite3_value **argv);
267 static int csvtabNext(sqlite3_vtab_cursor*);
268 static int csvtabEof(sqlite3_vtab_cursor*);
269 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
270 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
271 
272 /* An instance of the CSV virtual table */
273 typedef struct CsvTable {
274   sqlite3_vtab base;              /* Base class.  Must be first */
275   char *zFilename;                /* Name of the CSV file */
276   char *zData;                    /* Raw CSV data in lieu of zFilename */
277   long iStart;                    /* Offset to start of data in zFilename */
278   int nCol;                       /* Number of columns in the CSV file */
279   unsigned int tstFlags;          /* Bit values used for testing */
280 } CsvTable;
281 
282 /* Allowed values for tstFlags */
283 #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
284 
285 /* A cursor for the CSV virtual table */
286 typedef struct CsvCursor {
287   sqlite3_vtab_cursor base;       /* Base class.  Must be first */
288   CsvReader rdr;                  /* The CsvReader object */
289   char **azVal;                   /* Value of the current row */
290   sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
291 } CsvCursor;
292 
293 /* Transfer error message text from a reader into a CsvTable */
294 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
295   sqlite3_free(pTab->base.zErrMsg);
296   pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
297 }
298 
299 /*
300 ** This method is the destructor fo a CsvTable object.
301 */
302 static int csvtabDisconnect(sqlite3_vtab *pVtab){
303   CsvTable *p = (CsvTable*)pVtab;
304   sqlite3_free(p->zFilename);
305   sqlite3_free(p->zData);
306   sqlite3_free(p);
307   return SQLITE_OK;
308 }
309 
310 /* Skip leading whitespace.  Return a pointer to the first non-whitespace
311 ** character, or to the zero terminator if the string has only whitespace */
312 static const char *csv_skip_whitespace(const char *z){
313   while( isspace((unsigned char)z[0]) ) z++;
314   return z;
315 }
316 
317 /* Remove trailing whitespace from the end of string z[] */
318 static void csv_trim_whitespace(char *z){
319   size_t n = strlen(z);
320   while( n>0 && isspace((unsigned char)z[n]) ) n--;
321   z[n] = 0;
322 }
323 
324 /* Dequote the string */
325 static void csv_dequote(char *z){
326   int i, j;
327   char cQuote = z[0];
328   size_t n;
329 
330   if( cQuote!='\'' && cQuote!='"' ) return;
331   n = strlen(z);
332   if( n<2 || z[n-1]!=z[0] ) return;
333   for(i=1, j=0; i<n-1; i++){
334     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
335     z[j++] = z[i];
336   }
337   z[j] = 0;
338 }
339 
340 /* Check to see if the string is of the form:  "TAG = VALUE" with optional
341 ** whitespace before and around tokens.  If it is, return a pointer to the
342 ** first character of VALUE.  If it is not, return NULL.
343 */
344 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
345   z = csv_skip_whitespace(z);
346   if( strncmp(zTag, z, nTag)!=0 ) return 0;
347   z = csv_skip_whitespace(z+nTag);
348   if( z[0]!='=' ) return 0;
349   return csv_skip_whitespace(z+1);
350 }
351 
352 /* Decode a parameter that requires a dequoted string.
353 **
354 ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
355 ** even if there is an error.  If an error occurs, then an error message
356 ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
357 */
358 static int csv_string_parameter(
359   CsvReader *p,            /* Leave the error message here, if there is one */
360   const char *zParam,      /* Parameter we are checking for */
361   const char *zArg,        /* Raw text of the virtual table argment */
362   char **pzVal             /* Write the dequoted string value here */
363 ){
364   const char *zValue;
365   zValue = csv_parameter(zParam,strlen(zParam),zArg);
366   if( zValue==0 ) return 0;
367   p->zErr[0] = 0;
368   if( *pzVal ){
369     csv_errmsg(p, "more than one '%s' parameter", zParam);
370     return 1;
371   }
372   *pzVal = sqlite3_mprintf("%s", zValue);
373   if( *pzVal==0 ){
374     csv_errmsg(p, "out of memory");
375     return 1;
376   }
377   csv_trim_whitespace(*pzVal);
378   csv_dequote(*pzVal);
379   return 1;
380 }
381 
382 
383 /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
384 ** we cannot really tell.
385 */
386 static int csv_boolean(const char *z){
387   if( sqlite3_stricmp("yes",z)==0
388    || sqlite3_stricmp("on",z)==0
389    || sqlite3_stricmp("true",z)==0
390    || (z[0]=='1' && z[0]==0)
391   ){
392     return 1;
393   }
394   if( sqlite3_stricmp("no",z)==0
395    || sqlite3_stricmp("off",z)==0
396    || sqlite3_stricmp("false",z)==0
397    || (z[0]=='0' && z[1]==0)
398   ){
399     return 0;
400   }
401   return -1;
402 }
403 
404 
405 /*
406 ** Parameters:
407 **    filename=FILENAME          Name of file containing CSV content
408 **    data=TEXT                  Direct CSV content.
409 **    schema=SCHEMA              Alternative CSV schema.
410 **    header=YES|NO              First row of CSV defines the names of
411 **                               columns if "yes".  Default "no".
412 **    columns=N                  Assume the CSV file contains N columns.
413 **    testflags=N                Bitmask of test flags.  Optional
414 **
415 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
416 ** and so forth.  If columns=N is omitted, then the file is opened and
417 ** the number of columns in the first row is counted to determine the
418 ** column count.  If header=YES, then the first row is skipped.
419 */
420 static int csvtabConnect(
421   sqlite3 *db,
422   void *pAux,
423   int argc, const char *const*argv,
424   sqlite3_vtab **ppVtab,
425   char **pzErr
426 ){
427   CsvTable *pNew = 0;        /* The CsvTable object to construct */
428   int bHeader = -1;          /* header= flags.  -1 means not seen yet */
429   int rc = SQLITE_OK;        /* Result code from this routine */
430   int i, j;                  /* Loop counters */
431   int tstFlags = 0;          /* Value for testflags=N parameter */
432   int nCol = -99;            /* Value of the columns= parameter */
433   CsvReader sRdr;            /* A CSV file reader used to store an error
434                              ** message and/or to count the number of columns */
435   static const char *azParam[] = {
436      "filename", "data", "schema",
437   };
438   char *azPValue[3];         /* Parameter values */
439 # define CSV_FILENAME (azPValue[0])
440 # define CSV_DATA     (azPValue[1])
441 # define CSV_SCHEMA   (azPValue[2])
442 
443 
444   assert( sizeof(azPValue)==sizeof(azParam) );
445   memset(&sRdr, 0, sizeof(sRdr));
446   memset(azPValue, 0, sizeof(azPValue));
447   for(i=3; i<argc; i++){
448     const char *z = argv[i];
449     const char *zValue;
450     for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
451       if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
452     }
453     if( j<sizeof(azParam)/sizeof(azParam[0]) ){
454       if( sRdr.zErr[0] ) goto csvtab_connect_error;
455     }else
456     if( (zValue = csv_parameter("header",6,z))!=0 ){
457       int x;
458       if( bHeader>=0 ){
459         csv_errmsg(&sRdr, "more than one 'header' parameter");
460         goto csvtab_connect_error;
461       }
462       x = csv_boolean(zValue);
463       if( x==1 ){
464         bHeader = 1;
465       }else if( x==0 ){
466         bHeader = 0;
467       }else{
468         csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
469         goto csvtab_connect_error;
470       }
471     }else
472     if( (zValue = csv_parameter("testflags",9,z))!=0 ){
473       tstFlags = (unsigned int)atoi(zValue);
474     }else
475     if( (zValue = csv_parameter("columns",7,z))!=0 ){
476       if( nCol>0 ){
477         csv_errmsg(&sRdr, "more than one 'columns' parameter");
478         goto csvtab_connect_error;
479       }
480       nCol = atoi(zValue);
481       if( nCol<=0 ){
482         csv_errmsg(&sRdr, "must have at least one column");
483         goto csvtab_connect_error;
484       }
485     }else
486     {
487       csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
488       goto csvtab_connect_error;
489     }
490   }
491   if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
492     csv_errmsg(&sRdr, "must either filename= or data= but not both");
493     goto csvtab_connect_error;
494   }
495   if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
496     goto csvtab_connect_error;
497   }
498   pNew = sqlite3_malloc( sizeof(*pNew) );
499   *ppVtab = (sqlite3_vtab*)pNew;
500   if( pNew==0 ) goto csvtab_connect_oom;
501   memset(pNew, 0, sizeof(*pNew));
502   if( nCol>0 ){
503     pNew->nCol = nCol;
504   }else{
505     do{
506       const char *z = csv_read_one_field(&sRdr);
507       if( z==0 ) goto csvtab_connect_oom;
508       pNew->nCol++;
509     }while( sRdr.cTerm==',' );
510   }
511   pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
512   pNew->zData = CSV_DATA;          CSV_DATA = 0;
513   pNew->tstFlags = tstFlags;
514   pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
515   csv_reader_reset(&sRdr);
516   if( CSV_SCHEMA==0 ){
517     char *zSep = "";
518     CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
519     if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
520     for(i=0; i<pNew->nCol; i++){
521       CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
522       zSep = ",";
523     }
524     CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
525   }
526   rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
527   if( rc ) goto csvtab_connect_error;
528   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
529     sqlite3_free(azPValue[i]);
530   }
531   return SQLITE_OK;
532 
533 csvtab_connect_oom:
534   rc = SQLITE_NOMEM;
535   csv_errmsg(&sRdr, "out of memory");
536 
537 csvtab_connect_error:
538   if( pNew ) csvtabDisconnect(&pNew->base);
539   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
540     sqlite3_free(azPValue[i]);
541   }
542   if( sRdr.zErr[0] ){
543     sqlite3_free(*pzErr);
544     *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
545   }
546   csv_reader_reset(&sRdr);
547   if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
548   return rc;
549 }
550 
551 /*
552 ** Reset the current row content held by a CsvCursor.
553 */
554 static void csvtabCursorRowReset(CsvCursor *pCur){
555   CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
556   int i;
557   for(i=0; i<pTab->nCol; i++){
558     sqlite3_free(pCur->azVal[i]);
559     pCur->azVal[i] = 0;
560   }
561 }
562 
563 /*
564 ** The xConnect and xCreate methods do the same thing, but they must be
565 ** different so that the virtual table is not an eponymous virtual table.
566 */
567 static int csvtabCreate(
568   sqlite3 *db,
569   void *pAux,
570   int argc, const char *const*argv,
571   sqlite3_vtab **ppVtab,
572   char **pzErr
573 ){
574  return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
575 }
576 
577 /*
578 ** Destructor for a CsvCursor.
579 */
580 static int csvtabClose(sqlite3_vtab_cursor *cur){
581   CsvCursor *pCur = (CsvCursor*)cur;
582   csvtabCursorRowReset(pCur);
583   csv_reader_reset(&pCur->rdr);
584   sqlite3_free(cur);
585   return SQLITE_OK;
586 }
587 
588 /*
589 ** Constructor for a new CsvTable cursor object.
590 */
591 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
592   CsvTable *pTab = (CsvTable*)p;
593   CsvCursor *pCur;
594   pCur = sqlite3_malloc( sizeof(*pCur) * sizeof(char*)*pTab->nCol );
595   if( pCur==0 ) return SQLITE_NOMEM;
596   memset(pCur, 0, sizeof(*pCur) + sizeof(char*)*pTab->nCol );
597   pCur->azVal = (char**)&pCur[1];
598   *ppCursor = &pCur->base;
599   if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
600     csv_xfer_error(pTab, &pCur->rdr);
601     return SQLITE_ERROR;
602   }
603   return SQLITE_OK;
604 }
605 
606 
607 /*
608 ** Advance a CsvCursor to its next row of input.
609 ** Set the EOF marker if we reach the end of input.
610 */
611 static int csvtabNext(sqlite3_vtab_cursor *cur){
612   CsvCursor *pCur = (CsvCursor*)cur;
613   CsvTable *pTab = (CsvTable*)cur->pVtab;
614   int i = 0;
615   char *z;
616   csvtabCursorRowReset(pCur);
617   do{
618     z = csv_read_one_field(&pCur->rdr);
619     if( z==0 ){
620       csv_xfer_error(pTab, &pCur->rdr);
621       break;
622     }
623     z = sqlite3_mprintf("%s", z);
624     if( z==0 ){
625       csv_errmsg(&pCur->rdr, "out of memory");
626       csv_xfer_error(pTab, &pCur->rdr);
627       break;
628     }
629     if( i<pTab->nCol ){
630       pCur->azVal[i++] = z;
631     }
632   }while( z!=0 && pCur->rdr.cTerm==',' );
633   if( z==0 || pCur->rdr.cTerm==EOF ){
634     pCur->iRowid = -1;
635   }else{
636     pCur->iRowid++;
637   }
638   return SQLITE_OK;
639 }
640 
641 /*
642 ** Return values of columns for the row at which the CsvCursor
643 ** is currently pointing.
644 */
645 static int csvtabColumn(
646   sqlite3_vtab_cursor *cur,   /* The cursor */
647   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
648   int i                       /* Which column to return */
649 ){
650   CsvCursor *pCur = (CsvCursor*)cur;
651   CsvTable *pTab = (CsvTable*)cur->pVtab;
652   if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
653     sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_STATIC);
654   }
655   return SQLITE_OK;
656 }
657 
658 /*
659 ** Return the rowid for the current row.
660 */
661 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
662   CsvCursor *pCur = (CsvCursor*)cur;
663   *pRowid = pCur->iRowid;
664   return SQLITE_OK;
665 }
666 
667 /*
668 ** Return TRUE if the cursor has been moved off of the last
669 ** row of output.
670 */
671 static int csvtabEof(sqlite3_vtab_cursor *cur){
672   CsvCursor *pCur = (CsvCursor*)cur;
673   return pCur->iRowid<0;
674 }
675 
676 /*
677 ** Only a full table scan is supported.  So xFilter simply rewinds to
678 ** the beginning.
679 */
680 static int csvtabFilter(
681   sqlite3_vtab_cursor *pVtabCursor,
682   int idxNum, const char *idxStr,
683   int argc, sqlite3_value **argv
684 ){
685   CsvCursor *pCur = (CsvCursor*)pVtabCursor;
686   CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
687   pCur->iRowid = 0;
688   if( pCur->rdr.in==0 ){
689     assert( pCur->rdr.zIn==pTab->zData );
690     assert( pTab->iStart<=pCur->rdr.nIn );
691     pCur->rdr.iIn = pTab->iStart;
692   }else{
693     fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
694     pCur->rdr.iIn = 0;
695     pCur->rdr.nIn = 0;
696   }
697   return csvtabNext(pVtabCursor);
698 }
699 
700 /*
701 ** Only a forward full table scan is supported.  xBestIndex is mostly
702 ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
703 ** constraints lowers the estimated cost, which is fiction, but is useful
704 ** for testing certain kinds of virtual table behavior.
705 */
706 static int csvtabBestIndex(
707   sqlite3_vtab *tab,
708   sqlite3_index_info *pIdxInfo
709 ){
710   CsvTable *pTab = (CsvTable*)tab;
711   int i;
712   int nConst = 0;
713   pIdxInfo->estimatedCost = 1000000;
714   if( (pTab->tstFlags & CSVTEST_FIDX)==0 ){
715     return SQLITE_OK;
716   }
717   /* The usual (and sensible) case is to take the "return SQLITE_OK" above.
718   ** The code below only runs when testflags=1.  The code below
719   ** generates an artifical and unrealistic plan which is useful
720   ** for testing virtual table logic but is not helpfulto real applications.
721   **
722   ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
723   ** table (even though it is not) and the cost of running the virtual table
724   ** is reduced from 1 million to just 10.  The constraints are *not* marked
725   ** as omittable, however, so the query planner should still generate a
726   ** plan that gives a correct answer, even if they plan is not optimal.
727   */
728   for(i=0; i<pIdxInfo->nConstraint; i++){
729     unsigned char op;
730     if( pIdxInfo->aConstraint[i].usable==0 ) continue;
731     op = pIdxInfo->aConstraint[i].op;
732     if( op==SQLITE_INDEX_CONSTRAINT_EQ
733      || op==SQLITE_INDEX_CONSTRAINT_LIKE
734      || op==SQLITE_INDEX_CONSTRAINT_GLOB
735     ){
736       pIdxInfo->estimatedCost = 10;
737       pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
738       nConst++;
739     }
740   }
741   return SQLITE_OK;
742 }
743 
744 
745 static sqlite3_module CsvModule = {
746   0,                       /* iVersion */
747   csvtabCreate,            /* xCreate */
748   csvtabConnect,           /* xConnect */
749   csvtabBestIndex,         /* xBestIndex */
750   csvtabDisconnect,        /* xDisconnect */
751   csvtabDisconnect,        /* xDestroy */
752   csvtabOpen,              /* xOpen - open a cursor */
753   csvtabClose,             /* xClose - close a cursor */
754   csvtabFilter,            /* xFilter - configure scan constraints */
755   csvtabNext,              /* xNext - advance a cursor */
756   csvtabEof,               /* xEof - check for end of scan */
757   csvtabColumn,            /* xColumn - read data */
758   csvtabRowid,             /* xRowid - read data */
759   0,                       /* xUpdate */
760   0,                       /* xBegin */
761   0,                       /* xSync */
762   0,                       /* xCommit */
763   0,                       /* xRollback */
764   0,                       /* xFindMethod */
765   0,                       /* xRename */
766 };
767 
768 #ifdef _WIN32
769 __declspec(dllexport)
770 #endif
771 /*
772 ** This routine is called when the extension is loaded.  The new
773 ** CSV virtual table module is registered with the calling database
774 ** connection.
775 */
776 int sqlite3_csv_init(
777   sqlite3 *db,
778   char **pzErrMsg,
779   const sqlite3_api_routines *pApi
780 ){
781   SQLITE_EXTENSION_INIT2(pApi);
782   return sqlite3_create_module(db, "csv", &CsvModule, 0);
783 }
784