xref: /sqlite-3.40.0/ext/misc/csv.c (revision e7ebe0aa)
1724b1896Sdrh /*
2724b1896Sdrh ** 2016-05-28
3724b1896Sdrh **
4724b1896Sdrh ** The author disclaims copyright to this source code.  In place of
5724b1896Sdrh ** a legal notice, here is a blessing:
6724b1896Sdrh **
7724b1896Sdrh **    May you do good and not evil.
8724b1896Sdrh **    May you find forgiveness for yourself and forgive others.
9724b1896Sdrh **    May you share freely, never taking more than you give.
10724b1896Sdrh **
11724b1896Sdrh ******************************************************************************
12724b1896Sdrh **
13724b1896Sdrh ** This file contains the implementation of an SQLite virtual table for
14724b1896Sdrh ** reading CSV files.
15724b1896Sdrh **
16724b1896Sdrh ** Usage:
17724b1896Sdrh **
18724b1896Sdrh **    .load ./csv
19724b1896Sdrh **    CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20724b1896Sdrh **    SELECT * FROM csv;
21724b1896Sdrh **
226f147c54Sdrh ** The columns are named "c1", "c2", "c3", ... by default.  Or the
236f147c54Sdrh ** application can define its own CREATE TABLE statement using the
246f147c54Sdrh ** schema= parameter, like this:
25724b1896Sdrh **
26724b1896Sdrh **    CREATE VIRTUAL TABLE temp.csv2 USING csv(
27724b1896Sdrh **       filename = "../http.log",
28724b1896Sdrh **       schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29724b1896Sdrh **    );
30ac9c3d2cSdrh **
31ac9c3d2cSdrh ** Instead of specifying a file, the text of the CSV can be loaded using
32ac9c3d2cSdrh ** the data= parameter.
33ac9c3d2cSdrh **
34ac9c3d2cSdrh ** If the columns=N parameter is supplied, then the CSV file is assumed to have
356f147c54Sdrh ** N columns.  If both the columns= and schema= parameters are omitted, then
366f147c54Sdrh ** the number and names of the columns is determined by the first line of
376f147c54Sdrh ** the CSV input.
38ac9c3d2cSdrh **
39ac9c3d2cSdrh ** Some extra debugging features (used for testing virtual tables) are available
40ac9c3d2cSdrh ** if this module is compiled with -DSQLITE_TEST.
41724b1896Sdrh */
42724b1896Sdrh #include <sqlite3ext.h>
43724b1896Sdrh SQLITE_EXTENSION_INIT1
44724b1896Sdrh #include <string.h>
45724b1896Sdrh #include <stdlib.h>
46724b1896Sdrh #include <assert.h>
47724b1896Sdrh #include <stdarg.h>
48724b1896Sdrh #include <ctype.h>
49724b1896Sdrh #include <stdio.h>
50724b1896Sdrh 
51eb5a549eSdrh #ifndef SQLITE_OMIT_VIRTUALTABLE
52eb5a549eSdrh 
53724b1896Sdrh /*
54724b1896Sdrh ** A macro to hint to the compiler that a function should not be
55724b1896Sdrh ** inlined.
56724b1896Sdrh */
57724b1896Sdrh #if defined(__GNUC__)
58724b1896Sdrh #  define CSV_NOINLINE  __attribute__((noinline))
59724b1896Sdrh #elif defined(_MSC_VER) && _MSC_VER>=1310
60724b1896Sdrh #  define CSV_NOINLINE  __declspec(noinline)
61724b1896Sdrh #else
62724b1896Sdrh #  define CSV_NOINLINE
63724b1896Sdrh #endif
64724b1896Sdrh 
65724b1896Sdrh 
66724b1896Sdrh /* Max size of the error message in a CsvReader */
67724b1896Sdrh #define CSV_MXERR 200
68724b1896Sdrh 
69adcba64dSdrh /* Size of the CsvReader input buffer */
70adcba64dSdrh #define CSV_INBUFSZ 1024
71adcba64dSdrh 
72724b1896Sdrh /* A context object used when read a CSV file. */
73724b1896Sdrh typedef struct CsvReader CsvReader;
74724b1896Sdrh struct CsvReader {
75724b1896Sdrh   FILE *in;              /* Read the CSV text from this input stream */
76724b1896Sdrh   char *z;               /* Accumulated text for a field */
77724b1896Sdrh   int n;                 /* Number of bytes in z */
78724b1896Sdrh   int nAlloc;            /* Space allocated for z[] */
79724b1896Sdrh   int nLine;             /* Current line number */
80d5fbde80Sdrh   int bNotFirst;         /* True if prior text has been seen */
812acd24d9Sdrh   int cTerm;             /* Character that terminated the most recent field */
82adcba64dSdrh   size_t iIn;            /* Next unread character in the input buffer */
83adcba64dSdrh   size_t nIn;            /* Number of characters in the input buffer */
84adcba64dSdrh   char *zIn;             /* The input buffer */
85724b1896Sdrh   char zErr[CSV_MXERR];  /* Error message */
86724b1896Sdrh };
87724b1896Sdrh 
88724b1896Sdrh /* Initialize a CsvReader object */
csv_reader_init(CsvReader * p)89724b1896Sdrh static void csv_reader_init(CsvReader *p){
90adcba64dSdrh   p->in = 0;
91adcba64dSdrh   p->z = 0;
92adcba64dSdrh   p->n = 0;
93adcba64dSdrh   p->nAlloc = 0;
94adcba64dSdrh   p->nLine = 0;
95d5fbde80Sdrh   p->bNotFirst = 0;
96adcba64dSdrh   p->nIn = 0;
97adcba64dSdrh   p->zIn = 0;
98adcba64dSdrh   p->zErr[0] = 0;
99724b1896Sdrh }
100724b1896Sdrh 
101724b1896Sdrh /* Close and reset a CsvReader object */
csv_reader_reset(CsvReader * p)102724b1896Sdrh static void csv_reader_reset(CsvReader *p){
103adcba64dSdrh   if( p->in ){
104adcba64dSdrh     fclose(p->in);
105adcba64dSdrh     sqlite3_free(p->zIn);
106adcba64dSdrh   }
107724b1896Sdrh   sqlite3_free(p->z);
108724b1896Sdrh   csv_reader_init(p);
109724b1896Sdrh }
110724b1896Sdrh 
111724b1896Sdrh /* Report an error on a CsvReader */
csv_errmsg(CsvReader * p,const char * zFormat,...)112724b1896Sdrh static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
113724b1896Sdrh   va_list ap;
114724b1896Sdrh   va_start(ap, zFormat);
115724b1896Sdrh   sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
116724b1896Sdrh   va_end(ap);
117724b1896Sdrh }
118724b1896Sdrh 
119724b1896Sdrh /* Open the file associated with a CsvReader
120724b1896Sdrh ** Return the number of errors.
121724b1896Sdrh */
csv_reader_open(CsvReader * p,const char * zFilename,const char * zData)122adcba64dSdrh static int csv_reader_open(
123adcba64dSdrh   CsvReader *p,               /* The reader to open */
124adcba64dSdrh   const char *zFilename,      /* Read from this filename */
125adcba64dSdrh   const char *zData           /*  ... or use this data */
126adcba64dSdrh ){
127adcba64dSdrh   if( zFilename ){
128adcba64dSdrh     p->zIn = sqlite3_malloc( CSV_INBUFSZ );
129adcba64dSdrh     if( p->zIn==0 ){
130adcba64dSdrh       csv_errmsg(p, "out of memory");
131adcba64dSdrh       return 1;
132adcba64dSdrh     }
133724b1896Sdrh     p->in = fopen(zFilename, "rb");
134724b1896Sdrh     if( p->in==0 ){
1354d3e6140Sdrh       sqlite3_free(p->zIn);
136adcba64dSdrh       csv_reader_reset(p);
137724b1896Sdrh       csv_errmsg(p, "cannot open '%s' for reading", zFilename);
138724b1896Sdrh       return 1;
139724b1896Sdrh     }
140adcba64dSdrh   }else{
141adcba64dSdrh     assert( p->in==0 );
142adcba64dSdrh     p->zIn = (char*)zData;
143adcba64dSdrh     p->nIn = strlen(zData);
144adcba64dSdrh   }
145724b1896Sdrh   return 0;
146724b1896Sdrh }
147724b1896Sdrh 
148adcba64dSdrh /* The input buffer has overflowed.  Refill the input buffer, then
149adcba64dSdrh ** return the next character
150adcba64dSdrh */
csv_getc_refill(CsvReader * p)151adcba64dSdrh static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
152adcba64dSdrh   size_t got;
153adcba64dSdrh 
154adcba64dSdrh   assert( p->iIn>=p->nIn );  /* Only called on an empty input buffer */
155adcba64dSdrh   assert( p->in!=0 );        /* Only called if reading froma file */
156adcba64dSdrh 
157adcba64dSdrh   got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
158adcba64dSdrh   if( got==0 ) return EOF;
159adcba64dSdrh   p->nIn = got;
160adcba64dSdrh   p->iIn = 1;
161adcba64dSdrh   return p->zIn[0];
162adcba64dSdrh }
163adcba64dSdrh 
164adcba64dSdrh /* Return the next character of input.  Return EOF at end of input. */
csv_getc(CsvReader * p)165adcba64dSdrh static int csv_getc(CsvReader *p){
166adcba64dSdrh   if( p->iIn >= p->nIn ){
167adcba64dSdrh     if( p->in!=0 ) return csv_getc_refill(p);
168adcba64dSdrh     return EOF;
169adcba64dSdrh   }
1702acd24d9Sdrh   return ((unsigned char*)p->zIn)[p->iIn++];
171adcba64dSdrh }
172adcba64dSdrh 
173724b1896Sdrh /* Increase the size of p->z and append character c to the end.
174724b1896Sdrh ** Return 0 on success and non-zero if there is an OOM error */
csv_resize_and_append(CsvReader * p,char c)175724b1896Sdrh static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
176724b1896Sdrh   char *zNew;
177724b1896Sdrh   int nNew = p->nAlloc*2 + 100;
178724b1896Sdrh   zNew = sqlite3_realloc64(p->z, nNew);
179724b1896Sdrh   if( zNew ){
180724b1896Sdrh     p->z = zNew;
181724b1896Sdrh     p->nAlloc = nNew;
182724b1896Sdrh     p->z[p->n++] = c;
183724b1896Sdrh     return 0;
184724b1896Sdrh   }else{
185724b1896Sdrh     csv_errmsg(p, "out of memory");
186724b1896Sdrh     return 1;
187724b1896Sdrh   }
188724b1896Sdrh }
189724b1896Sdrh 
190724b1896Sdrh /* Append a single character to the CsvReader.z[] array.
191724b1896Sdrh ** Return 0 on success and non-zero if there is an OOM error */
csv_append(CsvReader * p,char c)192724b1896Sdrh static int csv_append(CsvReader *p, char c){
193724b1896Sdrh   if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
194724b1896Sdrh   p->z[p->n++] = c;
195724b1896Sdrh   return 0;
196724b1896Sdrh }
197724b1896Sdrh 
198724b1896Sdrh /* Read a single field of CSV text.  Compatible with rfc4180 and extended
199724b1896Sdrh ** with the option of having a separator other than ",".
200724b1896Sdrh **
201724b1896Sdrh **   +  Input comes from p->in.
202724b1896Sdrh **   +  Store results in p->z of length p->n.  Space to hold p->z comes
203724b1896Sdrh **      from sqlite3_malloc64().
204724b1896Sdrh **   +  Keep track of the line number in p->nLine.
205724b1896Sdrh **   +  Store the character that terminates the field in p->cTerm.  Store
206724b1896Sdrh **      EOF on end-of-file.
207724b1896Sdrh **
208e893e2e4Sdrh ** Return 0 at EOF or on OOM.  On EOF, the p->cTerm character will have
209e893e2e4Sdrh ** been set to EOF.
210724b1896Sdrh */
csv_read_one_field(CsvReader * p)211724b1896Sdrh static char *csv_read_one_field(CsvReader *p){
212724b1896Sdrh   int c;
213724b1896Sdrh   p->n = 0;
214adcba64dSdrh   c = csv_getc(p);
215724b1896Sdrh   if( c==EOF ){
216724b1896Sdrh     p->cTerm = EOF;
217e893e2e4Sdrh     return 0;
218724b1896Sdrh   }
219724b1896Sdrh   if( c=='"' ){
220724b1896Sdrh     int pc, ppc;
221724b1896Sdrh     int startLine = p->nLine;
222724b1896Sdrh     pc = ppc = 0;
223724b1896Sdrh     while( 1 ){
224adcba64dSdrh       c = csv_getc(p);
225ac9c3d2cSdrh       if( c<='"' || pc=='"' ){
226724b1896Sdrh         if( c=='\n' ) p->nLine++;
227ac9c3d2cSdrh         if( c=='"' ){
228ac9c3d2cSdrh           if( pc=='"' ){
229724b1896Sdrh             pc = 0;
230724b1896Sdrh             continue;
231724b1896Sdrh           }
232724b1896Sdrh         }
233ac9c3d2cSdrh         if( (c==',' && pc=='"')
234ac9c3d2cSdrh          || (c=='\n' && pc=='"')
235ac9c3d2cSdrh          || (c=='\n' && pc=='\r' && ppc=='"')
236ac9c3d2cSdrh          || (c==EOF && pc=='"')
237724b1896Sdrh         ){
238ac9c3d2cSdrh           do{ p->n--; }while( p->z[p->n]!='"' );
23980f2b33aSmistachkin           p->cTerm = (char)c;
240724b1896Sdrh           break;
241724b1896Sdrh         }
242ac9c3d2cSdrh         if( pc=='"' && c!='\r' ){
243ac9c3d2cSdrh           csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
244724b1896Sdrh           break;
245724b1896Sdrh         }
246724b1896Sdrh         if( c==EOF ){
247724b1896Sdrh           csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
248ac9c3d2cSdrh                      startLine, '"');
24980f2b33aSmistachkin           p->cTerm = (char)c;
250724b1896Sdrh           break;
251724b1896Sdrh         }
252ac9c3d2cSdrh       }
253724b1896Sdrh       if( csv_append(p, (char)c) ) return 0;
254724b1896Sdrh       ppc = pc;
255724b1896Sdrh       pc = c;
256724b1896Sdrh     }
257724b1896Sdrh   }else{
258d5fbde80Sdrh     /* If this is the first field being parsed and it begins with the
259d5fbde80Sdrh     ** UTF-8 BOM  (0xEF BB BF) then skip the BOM */
260d5fbde80Sdrh     if( (c&0xff)==0xef && p->bNotFirst==0 ){
2612fb960b5Sdrh       csv_append(p, (char)c);
262d5fbde80Sdrh       c = csv_getc(p);
263d5fbde80Sdrh       if( (c&0xff)==0xbb ){
2642fb960b5Sdrh         csv_append(p, (char)c);
265d5fbde80Sdrh         c = csv_getc(p);
266d5fbde80Sdrh         if( (c&0xff)==0xbf ){
267d5fbde80Sdrh           p->bNotFirst = 1;
268d5fbde80Sdrh           p->n = 0;
269d5fbde80Sdrh           return csv_read_one_field(p);
270d5fbde80Sdrh         }
271d5fbde80Sdrh       }
272d5fbde80Sdrh     }
273ac9c3d2cSdrh     while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
274724b1896Sdrh       if( csv_append(p, (char)c) ) return 0;
275adcba64dSdrh       c = csv_getc(p);
276724b1896Sdrh     }
277724b1896Sdrh     if( c=='\n' ){
278724b1896Sdrh       p->nLine++;
279724b1896Sdrh       if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
280724b1896Sdrh     }
28180f2b33aSmistachkin     p->cTerm = (char)c;
282724b1896Sdrh   }
283c578e4f0Sdan   assert( p->z==0 || p->n<p->nAlloc );
284724b1896Sdrh   if( p->z ) p->z[p->n] = 0;
285d5fbde80Sdrh   p->bNotFirst = 1;
286724b1896Sdrh   return p->z;
287724b1896Sdrh }
288724b1896Sdrh 
289724b1896Sdrh 
290724b1896Sdrh /* Forward references to the various virtual table methods implemented
291724b1896Sdrh ** in this file. */
292724b1896Sdrh static int csvtabCreate(sqlite3*, void*, int, const char*const*,
293724b1896Sdrh                            sqlite3_vtab**,char**);
294724b1896Sdrh static int csvtabConnect(sqlite3*, void*, int, const char*const*,
295724b1896Sdrh                            sqlite3_vtab**,char**);
296724b1896Sdrh static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
297724b1896Sdrh static int csvtabDisconnect(sqlite3_vtab*);
298724b1896Sdrh static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
299724b1896Sdrh static int csvtabClose(sqlite3_vtab_cursor*);
300724b1896Sdrh static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
301724b1896Sdrh                           int argc, sqlite3_value **argv);
302724b1896Sdrh static int csvtabNext(sqlite3_vtab_cursor*);
303724b1896Sdrh static int csvtabEof(sqlite3_vtab_cursor*);
304724b1896Sdrh static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
305724b1896Sdrh static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
306724b1896Sdrh 
307724b1896Sdrh /* An instance of the CSV virtual table */
308724b1896Sdrh typedef struct CsvTable {
309724b1896Sdrh   sqlite3_vtab base;              /* Base class.  Must be first */
310724b1896Sdrh   char *zFilename;                /* Name of the CSV file */
311adcba64dSdrh   char *zData;                    /* Raw CSV data in lieu of zFilename */
312724b1896Sdrh   long iStart;                    /* Offset to start of data in zFilename */
313724b1896Sdrh   int nCol;                       /* Number of columns in the CSV file */
314abfd272bSdrh   unsigned int tstFlags;          /* Bit values used for testing */
315724b1896Sdrh } CsvTable;
316724b1896Sdrh 
317abfd272bSdrh /* Allowed values for tstFlags */
318abfd272bSdrh #define CSVTEST_FIDX  0x0001      /* Pretend that constrained searchs cost less*/
319abfd272bSdrh 
320724b1896Sdrh /* A cursor for the CSV virtual table */
321724b1896Sdrh typedef struct CsvCursor {
322724b1896Sdrh   sqlite3_vtab_cursor base;       /* Base class.  Must be first */
323724b1896Sdrh   CsvReader rdr;                  /* The CsvReader object */
324724b1896Sdrh   char **azVal;                   /* Value of the current row */
325ac9c3d2cSdrh   int *aLen;                      /* Length of each entry */
326724b1896Sdrh   sqlite3_int64 iRowid;           /* The current rowid.  Negative for EOF */
327724b1896Sdrh } CsvCursor;
328724b1896Sdrh 
329724b1896Sdrh /* Transfer error message text from a reader into a CsvTable */
csv_xfer_error(CsvTable * pTab,CsvReader * pRdr)330724b1896Sdrh static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
331724b1896Sdrh   sqlite3_free(pTab->base.zErrMsg);
332724b1896Sdrh   pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
333724b1896Sdrh }
334724b1896Sdrh 
335724b1896Sdrh /*
336724b1896Sdrh ** This method is the destructor fo a CsvTable object.
337724b1896Sdrh */
csvtabDisconnect(sqlite3_vtab * pVtab)338724b1896Sdrh static int csvtabDisconnect(sqlite3_vtab *pVtab){
339724b1896Sdrh   CsvTable *p = (CsvTable*)pVtab;
340724b1896Sdrh   sqlite3_free(p->zFilename);
34135db31b2Sdrh   sqlite3_free(p->zData);
342724b1896Sdrh   sqlite3_free(p);
343724b1896Sdrh   return SQLITE_OK;
344724b1896Sdrh }
345724b1896Sdrh 
346724b1896Sdrh /* Skip leading whitespace.  Return a pointer to the first non-whitespace
347724b1896Sdrh ** character, or to the zero terminator if the string has only whitespace */
csv_skip_whitespace(const char * z)348724b1896Sdrh static const char *csv_skip_whitespace(const char *z){
349724b1896Sdrh   while( isspace((unsigned char)z[0]) ) z++;
350724b1896Sdrh   return z;
351724b1896Sdrh }
352724b1896Sdrh 
353724b1896Sdrh /* Remove trailing whitespace from the end of string z[] */
csv_trim_whitespace(char * z)354724b1896Sdrh static void csv_trim_whitespace(char *z){
355724b1896Sdrh   size_t n = strlen(z);
356724b1896Sdrh   while( n>0 && isspace((unsigned char)z[n]) ) n--;
357724b1896Sdrh   z[n] = 0;
358724b1896Sdrh }
359724b1896Sdrh 
360724b1896Sdrh /* Dequote the string */
csv_dequote(char * z)361724b1896Sdrh static void csv_dequote(char *z){
36280f2b33aSmistachkin   int j;
363724b1896Sdrh   char cQuote = z[0];
36480f2b33aSmistachkin   size_t i, n;
365724b1896Sdrh 
366724b1896Sdrh   if( cQuote!='\'' && cQuote!='"' ) return;
367724b1896Sdrh   n = strlen(z);
368724b1896Sdrh   if( n<2 || z[n-1]!=z[0] ) return;
369724b1896Sdrh   for(i=1, j=0; i<n-1; i++){
370724b1896Sdrh     if( z[i]==cQuote && z[i+1]==cQuote ) i++;
371724b1896Sdrh     z[j++] = z[i];
372724b1896Sdrh   }
373724b1896Sdrh   z[j] = 0;
374724b1896Sdrh }
375724b1896Sdrh 
376724b1896Sdrh /* Check to see if the string is of the form:  "TAG = VALUE" with optional
377724b1896Sdrh ** whitespace before and around tokens.  If it is, return a pointer to the
378724b1896Sdrh ** first character of VALUE.  If it is not, return NULL.
379724b1896Sdrh */
csv_parameter(const char * zTag,int nTag,const char * z)380724b1896Sdrh static const char *csv_parameter(const char *zTag, int nTag, const char *z){
381724b1896Sdrh   z = csv_skip_whitespace(z);
382724b1896Sdrh   if( strncmp(zTag, z, nTag)!=0 ) return 0;
383724b1896Sdrh   z = csv_skip_whitespace(z+nTag);
384724b1896Sdrh   if( z[0]!='=' ) return 0;
385724b1896Sdrh   return csv_skip_whitespace(z+1);
386724b1896Sdrh }
387724b1896Sdrh 
388adcba64dSdrh /* Decode a parameter that requires a dequoted string.
389adcba64dSdrh **
390adcba64dSdrh ** Return 1 if the parameter is seen, or 0 if not.  1 is returned
391adcba64dSdrh ** even if there is an error.  If an error occurs, then an error message
392adcba64dSdrh ** is left in p->zErr.  If there are no errors, p->zErr[0]==0.
393adcba64dSdrh */
csv_string_parameter(CsvReader * p,const char * zParam,const char * zArg,char ** pzVal)394adcba64dSdrh static int csv_string_parameter(
395adcba64dSdrh   CsvReader *p,            /* Leave the error message here, if there is one */
396adcba64dSdrh   const char *zParam,      /* Parameter we are checking for */
397adcba64dSdrh   const char *zArg,        /* Raw text of the virtual table argment */
398adcba64dSdrh   char **pzVal             /* Write the dequoted string value here */
399adcba64dSdrh ){
400adcba64dSdrh   const char *zValue;
40111499f0aSdrh   zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
402adcba64dSdrh   if( zValue==0 ) return 0;
403adcba64dSdrh   p->zErr[0] = 0;
404adcba64dSdrh   if( *pzVal ){
405adcba64dSdrh     csv_errmsg(p, "more than one '%s' parameter", zParam);
406adcba64dSdrh     return 1;
407adcba64dSdrh   }
408adcba64dSdrh   *pzVal = sqlite3_mprintf("%s", zValue);
409adcba64dSdrh   if( *pzVal==0 ){
410adcba64dSdrh     csv_errmsg(p, "out of memory");
411adcba64dSdrh     return 1;
412adcba64dSdrh   }
413adcba64dSdrh   csv_trim_whitespace(*pzVal);
414adcba64dSdrh   csv_dequote(*pzVal);
415adcba64dSdrh   return 1;
416adcba64dSdrh }
417adcba64dSdrh 
418adcba64dSdrh 
419724b1896Sdrh /* Return 0 if the argument is false and 1 if it is true.  Return -1 if
420724b1896Sdrh ** we cannot really tell.
421724b1896Sdrh */
csv_boolean(const char * z)422724b1896Sdrh static int csv_boolean(const char *z){
423724b1896Sdrh   if( sqlite3_stricmp("yes",z)==0
424724b1896Sdrh    || sqlite3_stricmp("on",z)==0
425724b1896Sdrh    || sqlite3_stricmp("true",z)==0
42658282f68Smistachkin    || (z[0]=='1' && z[1]==0)
427724b1896Sdrh   ){
428724b1896Sdrh     return 1;
429724b1896Sdrh   }
430724b1896Sdrh   if( sqlite3_stricmp("no",z)==0
431724b1896Sdrh    || sqlite3_stricmp("off",z)==0
432724b1896Sdrh    || sqlite3_stricmp("false",z)==0
433724b1896Sdrh    || (z[0]=='0' && z[1]==0)
434724b1896Sdrh   ){
435724b1896Sdrh     return 0;
436724b1896Sdrh   }
437724b1896Sdrh   return -1;
438724b1896Sdrh }
439724b1896Sdrh 
4406f147c54Sdrh /* Check to see if the string is of the form:  "TAG = BOOLEAN" or just "TAG".
4416f147c54Sdrh ** If it is, set *pValue to be the value of the boolean ("true" if there is
4426f147c54Sdrh ** not "= BOOLEAN" component) and return non-zero.  If the input string
4436f147c54Sdrh ** does not begin with TAG, return zero.
4446f147c54Sdrh */
csv_boolean_parameter(const char * zTag,int nTag,const char * z,int * pValue)4456f147c54Sdrh static int csv_boolean_parameter(
4466f147c54Sdrh   const char *zTag,       /* Tag we are looking for */
4476f147c54Sdrh   int nTag,               /* Size of the tag in bytes */
4486f147c54Sdrh   const char *z,          /* Input parameter */
4496f147c54Sdrh   int *pValue             /* Write boolean value here */
4506f147c54Sdrh ){
4516f147c54Sdrh   int b;
4526f147c54Sdrh   z = csv_skip_whitespace(z);
4536f147c54Sdrh   if( strncmp(zTag, z, nTag)!=0 ) return 0;
4546f147c54Sdrh   z = csv_skip_whitespace(z + nTag);
4556f147c54Sdrh   if( z[0]==0 ){
4566f147c54Sdrh     *pValue = 1;
4576f147c54Sdrh     return 1;
4586f147c54Sdrh   }
4596f147c54Sdrh   if( z[0]!='=' ) return 0;
4606f147c54Sdrh   z = csv_skip_whitespace(z+1);
4616f147c54Sdrh   b = csv_boolean(z);
4626f147c54Sdrh   if( b>=0 ){
4636f147c54Sdrh     *pValue = b;
4646f147c54Sdrh     return 1;
4656f147c54Sdrh   }
4666f147c54Sdrh   return 0;
4676f147c54Sdrh }
468724b1896Sdrh 
469724b1896Sdrh /*
470724b1896Sdrh ** Parameters:
471adcba64dSdrh **    filename=FILENAME          Name of file containing CSV content
472adcba64dSdrh **    data=TEXT                  Direct CSV content.
4731fc1a0f2Sdrh **    schema=SCHEMA              Alternative CSV schema.
474724b1896Sdrh **    header=YES|NO              First row of CSV defines the names of
475724b1896Sdrh **                               columns if "yes".  Default "no".
476adcba64dSdrh **    columns=N                  Assume the CSV file contains N columns.
477ac9c3d2cSdrh **
478ac9c3d2cSdrh ** Only available if compiled with SQLITE_TEST:
479ac9c3d2cSdrh **
480abfd272bSdrh **    testflags=N                Bitmask of test flags.  Optional
481724b1896Sdrh **
4821fc1a0f2Sdrh ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
4831fc1a0f2Sdrh ** and so forth.  If columns=N is omitted, then the file is opened and
4841fc1a0f2Sdrh ** the number of columns in the first row is counted to determine the
4851fc1a0f2Sdrh ** column count.  If header=YES, then the first row is skipped.
486724b1896Sdrh */
csvtabConnect(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)487724b1896Sdrh static int csvtabConnect(
488724b1896Sdrh   sqlite3 *db,
489724b1896Sdrh   void *pAux,
490724b1896Sdrh   int argc, const char *const*argv,
491724b1896Sdrh   sqlite3_vtab **ppVtab,
492724b1896Sdrh   char **pzErr
493724b1896Sdrh ){
4941fc1a0f2Sdrh   CsvTable *pNew = 0;        /* The CsvTable object to construct */
4951fc1a0f2Sdrh   int bHeader = -1;          /* header= flags.  -1 means not seen yet */
4961fc1a0f2Sdrh   int rc = SQLITE_OK;        /* Result code from this routine */
497adcba64dSdrh   int i, j;                  /* Loop counters */
498ac9c3d2cSdrh #ifdef SQLITE_TEST
499adcba64dSdrh   int tstFlags = 0;          /* Value for testflags=N parameter */
500ac9c3d2cSdrh #endif
5016f147c54Sdrh   int b;                     /* Value of a boolean parameter */
5021fc1a0f2Sdrh   int nCol = -99;            /* Value of the columns= parameter */
5031fc1a0f2Sdrh   CsvReader sRdr;            /* A CSV file reader used to store an error
5041fc1a0f2Sdrh                              ** message and/or to count the number of columns */
505adcba64dSdrh   static const char *azParam[] = {
506adcba64dSdrh      "filename", "data", "schema",
507adcba64dSdrh   };
508adcba64dSdrh   char *azPValue[3];         /* Parameter values */
509adcba64dSdrh # define CSV_FILENAME (azPValue[0])
510adcba64dSdrh # define CSV_DATA     (azPValue[1])
511adcba64dSdrh # define CSV_SCHEMA   (azPValue[2])
512724b1896Sdrh 
513adcba64dSdrh 
514adcba64dSdrh   assert( sizeof(azPValue)==sizeof(azParam) );
515724b1896Sdrh   memset(&sRdr, 0, sizeof(sRdr));
516adcba64dSdrh   memset(azPValue, 0, sizeof(azPValue));
517724b1896Sdrh   for(i=3; i<argc; i++){
518724b1896Sdrh     const char *z = argv[i];
519724b1896Sdrh     const char *zValue;
520adcba64dSdrh     for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
521adcba64dSdrh       if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
522724b1896Sdrh     }
523adcba64dSdrh     if( j<sizeof(azParam)/sizeof(azParam[0]) ){
524adcba64dSdrh       if( sRdr.zErr[0] ) goto csvtab_connect_error;
525724b1896Sdrh     }else
5266f147c54Sdrh     if( csv_boolean_parameter("header",6,z,&b) ){
527724b1896Sdrh       if( bHeader>=0 ){
528724b1896Sdrh         csv_errmsg(&sRdr, "more than one 'header' parameter");
529724b1896Sdrh         goto csvtab_connect_error;
530724b1896Sdrh       }
5316f147c54Sdrh       bHeader = b;
532724b1896Sdrh     }else
533ac9c3d2cSdrh #ifdef SQLITE_TEST
534abfd272bSdrh     if( (zValue = csv_parameter("testflags",9,z))!=0 ){
535abfd272bSdrh       tstFlags = (unsigned int)atoi(zValue);
536abfd272bSdrh     }else
537ac9c3d2cSdrh #endif
5381fc1a0f2Sdrh     if( (zValue = csv_parameter("columns",7,z))!=0 ){
5391fc1a0f2Sdrh       if( nCol>0 ){
5401fc1a0f2Sdrh         csv_errmsg(&sRdr, "more than one 'columns' parameter");
5411fc1a0f2Sdrh         goto csvtab_connect_error;
5421fc1a0f2Sdrh       }
5431fc1a0f2Sdrh       nCol = atoi(zValue);
5441fc1a0f2Sdrh       if( nCol<=0 ){
5456f147c54Sdrh         csv_errmsg(&sRdr, "column= value must be positive");
5461fc1a0f2Sdrh         goto csvtab_connect_error;
5471fc1a0f2Sdrh       }
5481fc1a0f2Sdrh     }else
549724b1896Sdrh     {
5506f147c54Sdrh       csv_errmsg(&sRdr, "bad parameter: '%s'", z);
551724b1896Sdrh       goto csvtab_connect_error;
552724b1896Sdrh     }
553724b1896Sdrh   }
554adcba64dSdrh   if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
5556f147c54Sdrh     csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
556724b1896Sdrh     goto csvtab_connect_error;
557724b1896Sdrh   }
5586f147c54Sdrh 
5596f147c54Sdrh   if( (nCol<=0 || bHeader==1)
5606f147c54Sdrh    && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
5616f147c54Sdrh   ){
562724b1896Sdrh     goto csvtab_connect_error;
563724b1896Sdrh   }
564724b1896Sdrh   pNew = sqlite3_malloc( sizeof(*pNew) );
565724b1896Sdrh   *ppVtab = (sqlite3_vtab*)pNew;
566724b1896Sdrh   if( pNew==0 ) goto csvtab_connect_oom;
567724b1896Sdrh   memset(pNew, 0, sizeof(*pNew));
5686f147c54Sdrh   if( CSV_SCHEMA==0 ){
5696f147c54Sdrh     sqlite3_str *pStr = sqlite3_str_new(0);
5706f147c54Sdrh     char *zSep = "";
5716f147c54Sdrh     int iCol = 0;
5726f147c54Sdrh     sqlite3_str_appendf(pStr, "CREATE TABLE x(");
5736f147c54Sdrh     if( nCol<0 && bHeader<1 ){
5746f147c54Sdrh       nCol = 0;
5756f147c54Sdrh       do{
5766f147c54Sdrh         csv_read_one_field(&sRdr);
5776f147c54Sdrh         nCol++;
5786f147c54Sdrh       }while( sRdr.cTerm==',' );
5796f147c54Sdrh     }
5806f147c54Sdrh     if( nCol>0 && bHeader<1 ){
5816f147c54Sdrh       for(iCol=0; iCol<nCol; iCol++){
5826f147c54Sdrh         sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
5836f147c54Sdrh         zSep = ",";
5846f147c54Sdrh       }
5851fc1a0f2Sdrh     }else{
586724b1896Sdrh       do{
5876f147c54Sdrh         char *z = csv_read_one_field(&sRdr);
5886f147c54Sdrh         if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
5896f147c54Sdrh           sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
5906f147c54Sdrh           zSep = ",";
5916f147c54Sdrh           iCol++;
5926f147c54Sdrh         }
5936f147c54Sdrh       }while( sRdr.cTerm==',' );
5946f147c54Sdrh       if( nCol<0 ){
5956f147c54Sdrh         nCol = iCol;
5966f147c54Sdrh       }else{
5976f147c54Sdrh         while( iCol<nCol ){
5986f147c54Sdrh           sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
5996f147c54Sdrh           zSep = ",";
6006f147c54Sdrh         }
6016f147c54Sdrh       }
6026f147c54Sdrh     }
6036f147c54Sdrh     pNew->nCol = nCol;
6046f147c54Sdrh     sqlite3_str_appendf(pStr, ")");
6056f147c54Sdrh     CSV_SCHEMA = sqlite3_str_finish(pStr);
6066f147c54Sdrh     if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
6076f147c54Sdrh   }else if( nCol<0 ){
6086f147c54Sdrh     do{
609e893e2e4Sdrh       csv_read_one_field(&sRdr);
610724b1896Sdrh       pNew->nCol++;
611724b1896Sdrh     }while( sRdr.cTerm==',' );
6126f147c54Sdrh   }else{
6136f147c54Sdrh     pNew->nCol = nCol;
6141fc1a0f2Sdrh   }
615adcba64dSdrh   pNew->zFilename = CSV_FILENAME;  CSV_FILENAME = 0;
616adcba64dSdrh   pNew->zData = CSV_DATA;          CSV_DATA = 0;
617ac9c3d2cSdrh #ifdef SQLITE_TEST
618abfd272bSdrh   pNew->tstFlags = tstFlags;
619ac9c3d2cSdrh #endif
6206f147c54Sdrh   if( bHeader!=1 ){
6216f147c54Sdrh     pNew->iStart = 0;
6226f147c54Sdrh   }else if( pNew->zData ){
6236f147c54Sdrh     pNew->iStart = (int)sRdr.iIn;
6246f147c54Sdrh   }else{
62520c00823Sdrh     pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
6266f147c54Sdrh   }
627724b1896Sdrh   csv_reader_reset(&sRdr);
628adcba64dSdrh   rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
6296f147c54Sdrh   if( rc ){
6306f147c54Sdrh     csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
6316f147c54Sdrh     goto csvtab_connect_error;
6326f147c54Sdrh   }
633adcba64dSdrh   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
634adcba64dSdrh     sqlite3_free(azPValue[i]);
635adcba64dSdrh   }
6362b1c2aadSdrh   /* Rationale for DIRECTONLY:
6372b1c2aadSdrh   ** An attacker who controls a database schema could use this vtab
6382b1c2aadSdrh   ** to exfiltrate sensitive data from other files in the filesystem.
6392b1c2aadSdrh   ** And, recommended practice is to put all CSV virtual tables in the
6402b1c2aadSdrh   ** TEMP namespace, so they should still be usable from within TEMP
6412b1c2aadSdrh   ** views, so there shouldn't be a serious loss of functionality by
6422b1c2aadSdrh   ** prohibiting the use of this vtab from persistent triggers and views.
6432b1c2aadSdrh   */
6442b1c2aadSdrh   sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
645724b1896Sdrh   return SQLITE_OK;
646724b1896Sdrh 
647724b1896Sdrh csvtab_connect_oom:
648724b1896Sdrh   rc = SQLITE_NOMEM;
649724b1896Sdrh   csv_errmsg(&sRdr, "out of memory");
650724b1896Sdrh 
651724b1896Sdrh csvtab_connect_error:
652724b1896Sdrh   if( pNew ) csvtabDisconnect(&pNew->base);
653adcba64dSdrh   for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
654adcba64dSdrh     sqlite3_free(azPValue[i]);
655adcba64dSdrh   }
656724b1896Sdrh   if( sRdr.zErr[0] ){
657724b1896Sdrh     sqlite3_free(*pzErr);
658724b1896Sdrh     *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
659724b1896Sdrh   }
660724b1896Sdrh   csv_reader_reset(&sRdr);
661abfd272bSdrh   if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
662724b1896Sdrh   return rc;
663724b1896Sdrh }
664724b1896Sdrh 
665724b1896Sdrh /*
666724b1896Sdrh ** Reset the current row content held by a CsvCursor.
667724b1896Sdrh */
csvtabCursorRowReset(CsvCursor * pCur)668724b1896Sdrh static void csvtabCursorRowReset(CsvCursor *pCur){
669724b1896Sdrh   CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
670724b1896Sdrh   int i;
671724b1896Sdrh   for(i=0; i<pTab->nCol; i++){
672724b1896Sdrh     sqlite3_free(pCur->azVal[i]);
673724b1896Sdrh     pCur->azVal[i] = 0;
674ac9c3d2cSdrh     pCur->aLen[i] = 0;
675724b1896Sdrh   }
676724b1896Sdrh }
677724b1896Sdrh 
678724b1896Sdrh /*
679724b1896Sdrh ** The xConnect and xCreate methods do the same thing, but they must be
680724b1896Sdrh ** different so that the virtual table is not an eponymous virtual table.
681724b1896Sdrh */
csvtabCreate(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVtab,char ** pzErr)682724b1896Sdrh static int csvtabCreate(
683724b1896Sdrh   sqlite3 *db,
684724b1896Sdrh   void *pAux,
685724b1896Sdrh   int argc, const char *const*argv,
686724b1896Sdrh   sqlite3_vtab **ppVtab,
687724b1896Sdrh   char **pzErr
688724b1896Sdrh ){
689724b1896Sdrh  return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
690724b1896Sdrh }
691724b1896Sdrh 
692724b1896Sdrh /*
693724b1896Sdrh ** Destructor for a CsvCursor.
694724b1896Sdrh */
csvtabClose(sqlite3_vtab_cursor * cur)695724b1896Sdrh static int csvtabClose(sqlite3_vtab_cursor *cur){
696724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)cur;
697724b1896Sdrh   csvtabCursorRowReset(pCur);
698724b1896Sdrh   csv_reader_reset(&pCur->rdr);
699724b1896Sdrh   sqlite3_free(cur);
700724b1896Sdrh   return SQLITE_OK;
701724b1896Sdrh }
702724b1896Sdrh 
703724b1896Sdrh /*
704724b1896Sdrh ** Constructor for a new CsvTable cursor object.
705724b1896Sdrh */
csvtabOpen(sqlite3_vtab * p,sqlite3_vtab_cursor ** ppCursor)706724b1896Sdrh static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
707724b1896Sdrh   CsvTable *pTab = (CsvTable*)p;
708724b1896Sdrh   CsvCursor *pCur;
709ac9c3d2cSdrh   size_t nByte;
710ac9c3d2cSdrh   nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
71111499f0aSdrh   pCur = sqlite3_malloc64( nByte );
712724b1896Sdrh   if( pCur==0 ) return SQLITE_NOMEM;
713ac9c3d2cSdrh   memset(pCur, 0, nByte);
714724b1896Sdrh   pCur->azVal = (char**)&pCur[1];
715ac9c3d2cSdrh   pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
716724b1896Sdrh   *ppCursor = &pCur->base;
717adcba64dSdrh   if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
718724b1896Sdrh     csv_xfer_error(pTab, &pCur->rdr);
719724b1896Sdrh     return SQLITE_ERROR;
720724b1896Sdrh   }
721724b1896Sdrh   return SQLITE_OK;
722724b1896Sdrh }
723724b1896Sdrh 
724724b1896Sdrh 
725724b1896Sdrh /*
726724b1896Sdrh ** Advance a CsvCursor to its next row of input.
727724b1896Sdrh ** Set the EOF marker if we reach the end of input.
728724b1896Sdrh */
csvtabNext(sqlite3_vtab_cursor * cur)729724b1896Sdrh static int csvtabNext(sqlite3_vtab_cursor *cur){
730724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)cur;
731724b1896Sdrh   CsvTable *pTab = (CsvTable*)cur->pVtab;
732724b1896Sdrh   int i = 0;
733724b1896Sdrh   char *z;
734724b1896Sdrh   do{
735724b1896Sdrh     z = csv_read_one_field(&pCur->rdr);
736724b1896Sdrh     if( z==0 ){
737724b1896Sdrh       break;
738724b1896Sdrh     }
739ac9c3d2cSdrh     if( i<pTab->nCol ){
740ac9c3d2cSdrh       if( pCur->aLen[i] < pCur->rdr.n+1 ){
74111499f0aSdrh         char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
742ac9c3d2cSdrh         if( zNew==0 ){
743724b1896Sdrh           csv_errmsg(&pCur->rdr, "out of memory");
744724b1896Sdrh           csv_xfer_error(pTab, &pCur->rdr);
745724b1896Sdrh           break;
746724b1896Sdrh         }
747ac9c3d2cSdrh         pCur->azVal[i] = zNew;
748ac9c3d2cSdrh         pCur->aLen[i] = pCur->rdr.n+1;
749724b1896Sdrh       }
750ac9c3d2cSdrh       memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
751ac9c3d2cSdrh       i++;
752ac9c3d2cSdrh     }
753ac9c3d2cSdrh   }while( pCur->rdr.cTerm==',' );
754*e7ebe0aaSdan   if( z==0 && i==0 ){
7554f573529Sdrh     pCur->iRowid = -1;
7564f573529Sdrh   }else{
7574f573529Sdrh     pCur->iRowid++;
758ac9c3d2cSdrh     while( i<pTab->nCol ){
759ac9c3d2cSdrh       sqlite3_free(pCur->azVal[i]);
760ac9c3d2cSdrh       pCur->azVal[i] = 0;
761ac9c3d2cSdrh       pCur->aLen[i] = 0;
762ac9c3d2cSdrh       i++;
763ac9c3d2cSdrh     }
764724b1896Sdrh   }
765724b1896Sdrh   return SQLITE_OK;
766724b1896Sdrh }
767724b1896Sdrh 
768724b1896Sdrh /*
769724b1896Sdrh ** Return values of columns for the row at which the CsvCursor
770724b1896Sdrh ** is currently pointing.
771724b1896Sdrh */
csvtabColumn(sqlite3_vtab_cursor * cur,sqlite3_context * ctx,int i)772724b1896Sdrh static int csvtabColumn(
773724b1896Sdrh   sqlite3_vtab_cursor *cur,   /* The cursor */
774724b1896Sdrh   sqlite3_context *ctx,       /* First argument to sqlite3_result_...() */
775724b1896Sdrh   int i                       /* Which column to return */
776724b1896Sdrh ){
777724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)cur;
778724b1896Sdrh   CsvTable *pTab = (CsvTable*)cur->pVtab;
779724b1896Sdrh   if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
7800897c3c0Sdrh     sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
781724b1896Sdrh   }
782724b1896Sdrh   return SQLITE_OK;
783724b1896Sdrh }
784724b1896Sdrh 
785724b1896Sdrh /*
786724b1896Sdrh ** Return the rowid for the current row.
787724b1896Sdrh */
csvtabRowid(sqlite3_vtab_cursor * cur,sqlite_int64 * pRowid)788724b1896Sdrh static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
789724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)cur;
790724b1896Sdrh   *pRowid = pCur->iRowid;
791724b1896Sdrh   return SQLITE_OK;
792724b1896Sdrh }
793724b1896Sdrh 
794724b1896Sdrh /*
795724b1896Sdrh ** Return TRUE if the cursor has been moved off of the last
796724b1896Sdrh ** row of output.
797724b1896Sdrh */
csvtabEof(sqlite3_vtab_cursor * cur)798724b1896Sdrh static int csvtabEof(sqlite3_vtab_cursor *cur){
799724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)cur;
800724b1896Sdrh   return pCur->iRowid<0;
801724b1896Sdrh }
802724b1896Sdrh 
803724b1896Sdrh /*
804724b1896Sdrh ** Only a full table scan is supported.  So xFilter simply rewinds to
805724b1896Sdrh ** the beginning.
806724b1896Sdrh */
csvtabFilter(sqlite3_vtab_cursor * pVtabCursor,int idxNum,const char * idxStr,int argc,sqlite3_value ** argv)807724b1896Sdrh static int csvtabFilter(
808724b1896Sdrh   sqlite3_vtab_cursor *pVtabCursor,
809724b1896Sdrh   int idxNum, const char *idxStr,
810724b1896Sdrh   int argc, sqlite3_value **argv
811724b1896Sdrh ){
812724b1896Sdrh   CsvCursor *pCur = (CsvCursor*)pVtabCursor;
813724b1896Sdrh   CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
814724b1896Sdrh   pCur->iRowid = 0;
815c578e4f0Sdan 
816c578e4f0Sdan   /* Ensure the field buffer is always allocated. Otherwise, if the
817c578e4f0Sdan   ** first field is zero bytes in size, this may be mistaken for an OOM
818c578e4f0Sdan   ** error in csvtabNext(). */
819c578e4f0Sdan   if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
820c578e4f0Sdan 
821adcba64dSdrh   if( pCur->rdr.in==0 ){
822adcba64dSdrh     assert( pCur->rdr.zIn==pTab->zData );
82380f2b33aSmistachkin     assert( pTab->iStart>=0 );
82480f2b33aSmistachkin     assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
825adcba64dSdrh     pCur->rdr.iIn = pTab->iStart;
826adcba64dSdrh   }else{
827724b1896Sdrh     fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
828adcba64dSdrh     pCur->rdr.iIn = 0;
829adcba64dSdrh     pCur->rdr.nIn = 0;
830adcba64dSdrh   }
831724b1896Sdrh   return csvtabNext(pVtabCursor);
832724b1896Sdrh }
833724b1896Sdrh 
834724b1896Sdrh /*
835adcba64dSdrh ** Only a forward full table scan is supported.  xBestIndex is mostly
836abfd272bSdrh ** a no-op.  If CSVTEST_FIDX is set, then the presence of equality
837abfd272bSdrh ** constraints lowers the estimated cost, which is fiction, but is useful
838abfd272bSdrh ** for testing certain kinds of virtual table behavior.
839724b1896Sdrh */
csvtabBestIndex(sqlite3_vtab * tab,sqlite3_index_info * pIdxInfo)840724b1896Sdrh static int csvtabBestIndex(
841724b1896Sdrh   sqlite3_vtab *tab,
842724b1896Sdrh   sqlite3_index_info *pIdxInfo
843724b1896Sdrh ){
844abfd272bSdrh   pIdxInfo->estimatedCost = 1000000;
845ac9c3d2cSdrh #ifdef SQLITE_TEST
846ac9c3d2cSdrh   if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
847ac9c3d2cSdrh     /* The usual (and sensible) case is to always do a full table scan.
848ac9c3d2cSdrh     ** The code in this branch only runs when testflags=1.  This code
8491fc1a0f2Sdrh     ** generates an artifical and unrealistic plan which is useful
850adcba64dSdrh     ** for testing virtual table logic but is not helpful to real applications.
851adcba64dSdrh     **
852adcba64dSdrh     ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
853adcba64dSdrh     ** table (even though it is not) and the cost of running the virtual table
854adcba64dSdrh     ** is reduced from 1 million to just 10.  The constraints are *not* marked
855adcba64dSdrh     ** as omittable, however, so the query planner should still generate a
856adcba64dSdrh     ** plan that gives a correct answer, even if they plan is not optimal.
857adcba64dSdrh     */
858ac9c3d2cSdrh     int i;
859ac9c3d2cSdrh     int nConst = 0;
860abfd272bSdrh     for(i=0; i<pIdxInfo->nConstraint; i++){
8611fc1a0f2Sdrh       unsigned char op;
862abfd272bSdrh       if( pIdxInfo->aConstraint[i].usable==0 ) continue;
8631fc1a0f2Sdrh       op = pIdxInfo->aConstraint[i].op;
8641fc1a0f2Sdrh       if( op==SQLITE_INDEX_CONSTRAINT_EQ
8651fc1a0f2Sdrh        || op==SQLITE_INDEX_CONSTRAINT_LIKE
8661fc1a0f2Sdrh        || op==SQLITE_INDEX_CONSTRAINT_GLOB
8671fc1a0f2Sdrh       ){
868abfd272bSdrh         pIdxInfo->estimatedCost = 10;
8691fc1a0f2Sdrh         pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
8701fc1a0f2Sdrh         nConst++;
871abfd272bSdrh       }
872abfd272bSdrh     }
873ac9c3d2cSdrh   }
874ac9c3d2cSdrh #endif
875724b1896Sdrh   return SQLITE_OK;
876724b1896Sdrh }
877724b1896Sdrh 
878724b1896Sdrh 
879724b1896Sdrh static sqlite3_module CsvModule = {
880724b1896Sdrh   0,                       /* iVersion */
881724b1896Sdrh   csvtabCreate,            /* xCreate */
882724b1896Sdrh   csvtabConnect,           /* xConnect */
883724b1896Sdrh   csvtabBestIndex,         /* xBestIndex */
884724b1896Sdrh   csvtabDisconnect,        /* xDisconnect */
885724b1896Sdrh   csvtabDisconnect,        /* xDestroy */
886724b1896Sdrh   csvtabOpen,              /* xOpen - open a cursor */
887724b1896Sdrh   csvtabClose,             /* xClose - close a cursor */
888724b1896Sdrh   csvtabFilter,            /* xFilter - configure scan constraints */
889724b1896Sdrh   csvtabNext,              /* xNext - advance a cursor */
890724b1896Sdrh   csvtabEof,               /* xEof - check for end of scan */
891724b1896Sdrh   csvtabColumn,            /* xColumn - read data */
892724b1896Sdrh   csvtabRowid,             /* xRowid - read data */
893724b1896Sdrh   0,                       /* xUpdate */
894724b1896Sdrh   0,                       /* xBegin */
895724b1896Sdrh   0,                       /* xSync */
896724b1896Sdrh   0,                       /* xCommit */
897724b1896Sdrh   0,                       /* xRollback */
898724b1896Sdrh   0,                       /* xFindMethod */
899724b1896Sdrh   0,                       /* xRename */
900724b1896Sdrh };
901724b1896Sdrh 
902ac9c3d2cSdrh #ifdef SQLITE_TEST
903ac9c3d2cSdrh /*
904ac9c3d2cSdrh ** For virtual table testing, make a version of the CSV virtual table
905ac9c3d2cSdrh ** available that has an xUpdate function.  But the xUpdate always returns
906ac9c3d2cSdrh ** SQLITE_READONLY since the CSV file is not really writable.
907ac9c3d2cSdrh */
csvtabUpdate(sqlite3_vtab * p,int n,sqlite3_value ** v,sqlite3_int64 * x)908ac9c3d2cSdrh static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
909ac9c3d2cSdrh   return SQLITE_READONLY;
910ac9c3d2cSdrh }
911ac9c3d2cSdrh static sqlite3_module CsvModuleFauxWrite = {
912ac9c3d2cSdrh   0,                       /* iVersion */
913ac9c3d2cSdrh   csvtabCreate,            /* xCreate */
914ac9c3d2cSdrh   csvtabConnect,           /* xConnect */
915ac9c3d2cSdrh   csvtabBestIndex,         /* xBestIndex */
916ac9c3d2cSdrh   csvtabDisconnect,        /* xDisconnect */
917ac9c3d2cSdrh   csvtabDisconnect,        /* xDestroy */
918ac9c3d2cSdrh   csvtabOpen,              /* xOpen - open a cursor */
919ac9c3d2cSdrh   csvtabClose,             /* xClose - close a cursor */
920ac9c3d2cSdrh   csvtabFilter,            /* xFilter - configure scan constraints */
921ac9c3d2cSdrh   csvtabNext,              /* xNext - advance a cursor */
922ac9c3d2cSdrh   csvtabEof,               /* xEof - check for end of scan */
923ac9c3d2cSdrh   csvtabColumn,            /* xColumn - read data */
924ac9c3d2cSdrh   csvtabRowid,             /* xRowid - read data */
925ac9c3d2cSdrh   csvtabUpdate,            /* xUpdate */
926ac9c3d2cSdrh   0,                       /* xBegin */
927ac9c3d2cSdrh   0,                       /* xSync */
928ac9c3d2cSdrh   0,                       /* xCommit */
929ac9c3d2cSdrh   0,                       /* xRollback */
930ac9c3d2cSdrh   0,                       /* xFindMethod */
931ac9c3d2cSdrh   0,                       /* xRename */
932ac9c3d2cSdrh };
933ac9c3d2cSdrh #endif /* SQLITE_TEST */
934ac9c3d2cSdrh 
935eb5a549eSdrh #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
936ac9c3d2cSdrh 
937ac9c3d2cSdrh 
938724b1896Sdrh #ifdef _WIN32
939724b1896Sdrh __declspec(dllexport)
940724b1896Sdrh #endif
941724b1896Sdrh /*
942724b1896Sdrh ** This routine is called when the extension is loaded.  The new
943724b1896Sdrh ** CSV virtual table module is registered with the calling database
944724b1896Sdrh ** connection.
945724b1896Sdrh */
sqlite3_csv_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)946724b1896Sdrh int sqlite3_csv_init(
947724b1896Sdrh   sqlite3 *db,
948724b1896Sdrh   char **pzErrMsg,
949724b1896Sdrh   const sqlite3_api_routines *pApi
950724b1896Sdrh ){
951eb5a549eSdrh #ifndef SQLITE_OMIT_VIRTUALTABLE
952ac9c3d2cSdrh   int rc;
953724b1896Sdrh   SQLITE_EXTENSION_INIT2(pApi);
954ac9c3d2cSdrh   rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
955ac9c3d2cSdrh #ifdef SQLITE_TEST
956ac9c3d2cSdrh   if( rc==SQLITE_OK ){
957ac9c3d2cSdrh     rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
958ac9c3d2cSdrh   }
959ac9c3d2cSdrh #endif
960ac9c3d2cSdrh   return rc;
961eb5a549eSdrh #else
962eb5a549eSdrh   return SQLITE_OK;
963eb5a549eSdrh #endif
964724b1896Sdrh }
965