New assert() statements to validate OP_OpenEphemeral logic.
[sqlite.git] / ext / misc / csv.c
blobb38500f4b92939d77ac74dada25ad7575dcebaf4
1 /*
2 ** 2016-05-28
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains the implementation of an SQLite virtual table for
14 ** reading CSV files.
16 ** Usage:
18 ** .load ./csv
19 ** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
20 ** SELECT * FROM csv;
22 ** The columns are named "c1", "c2", "c3", ... by default. Or the
23 ** application can define its own CREATE TABLE statement using the
24 ** schema= parameter, like this:
26 ** CREATE VIRTUAL TABLE temp.csv2 USING csv(
27 ** filename = "../http.log",
28 ** schema = "CREATE TABLE x(date,ipaddr,url,referrer,userAgent)"
29 ** );
31 ** Instead of specifying a file, the text of the CSV can be loaded using
32 ** the data= parameter.
34 ** If the columns=N parameter is supplied, then the CSV file is assumed to have
35 ** N columns. If both the columns= and schema= parameters are omitted, then
36 ** the number and names of the columns is determined by the first line of
37 ** the CSV input.
39 ** Some extra debugging features (used for testing virtual tables) are available
40 ** if this module is compiled with -DSQLITE_TEST.
42 #include <sqlite3ext.h>
43 SQLITE_EXTENSION_INIT1
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <ctype.h>
49 #include <stdio.h>
51 #ifndef SQLITE_OMIT_VIRTUALTABLE
54 ** A macro to hint to the compiler that a function should not be
55 ** inlined.
57 #if defined(__GNUC__)
58 # define CSV_NOINLINE __attribute__((noinline))
59 #elif defined(_MSC_VER) && _MSC_VER>=1310
60 # define CSV_NOINLINE __declspec(noinline)
61 #else
62 # define CSV_NOINLINE
63 #endif
66 /* Max size of the error message in a CsvReader */
67 #define CSV_MXERR 200
69 /* Size of the CsvReader input buffer */
70 #define CSV_INBUFSZ 1024
72 /* A context object used when read a CSV file. */
73 typedef struct CsvReader CsvReader;
74 struct CsvReader {
75 FILE *in; /* Read the CSV text from this input stream */
76 char *z; /* Accumulated text for a field */
77 int n; /* Number of bytes in z */
78 int nAlloc; /* Space allocated for z[] */
79 int nLine; /* Current line number */
80 int bNotFirst; /* True if prior text has been seen */
81 int cTerm; /* Character that terminated the most recent field */
82 size_t iIn; /* Next unread character in the input buffer */
83 size_t nIn; /* Number of characters in the input buffer */
84 char *zIn; /* The input buffer */
85 char zErr[CSV_MXERR]; /* Error message */
88 /* Initialize a CsvReader object */
89 static void csv_reader_init(CsvReader *p){
90 p->in = 0;
91 p->z = 0;
92 p->n = 0;
93 p->nAlloc = 0;
94 p->nLine = 0;
95 p->bNotFirst = 0;
96 p->nIn = 0;
97 p->zIn = 0;
98 p->zErr[0] = 0;
101 /* Close and reset a CsvReader object */
102 static void csv_reader_reset(CsvReader *p){
103 if( p->in ){
104 fclose(p->in);
105 sqlite3_free(p->zIn);
107 sqlite3_free(p->z);
108 csv_reader_init(p);
111 /* Report an error on a CsvReader */
112 static void csv_errmsg(CsvReader *p, const char *zFormat, ...){
113 va_list ap;
114 va_start(ap, zFormat);
115 sqlite3_vsnprintf(CSV_MXERR, p->zErr, zFormat, ap);
116 va_end(ap);
119 /* Open the file associated with a CsvReader
120 ** Return the number of errors.
122 static int csv_reader_open(
123 CsvReader *p, /* The reader to open */
124 const char *zFilename, /* Read from this filename */
125 const char *zData /* ... or use this data */
127 if( zFilename ){
128 p->zIn = sqlite3_malloc( CSV_INBUFSZ );
129 if( p->zIn==0 ){
130 csv_errmsg(p, "out of memory");
131 return 1;
133 p->in = fopen(zFilename, "rb");
134 if( p->in==0 ){
135 sqlite3_free(p->zIn);
136 csv_reader_reset(p);
137 csv_errmsg(p, "cannot open '%s' for reading", zFilename);
138 return 1;
140 }else{
141 assert( p->in==0 );
142 p->zIn = (char*)zData;
143 p->nIn = strlen(zData);
145 return 0;
148 /* The input buffer has overflowed. Refill the input buffer, then
149 ** return the next character
151 static CSV_NOINLINE int csv_getc_refill(CsvReader *p){
152 size_t got;
154 assert( p->iIn>=p->nIn ); /* Only called on an empty input buffer */
155 assert( p->in!=0 ); /* Only called if reading froma file */
157 got = fread(p->zIn, 1, CSV_INBUFSZ, p->in);
158 if( got==0 ) return EOF;
159 p->nIn = got;
160 p->iIn = 1;
161 return p->zIn[0];
164 /* Return the next character of input. Return EOF at end of input. */
165 static int csv_getc(CsvReader *p){
166 if( p->iIn >= p->nIn ){
167 if( p->in!=0 ) return csv_getc_refill(p);
168 return EOF;
170 return ((unsigned char*)p->zIn)[p->iIn++];
173 /* Increase the size of p->z and append character c to the end.
174 ** Return 0 on success and non-zero if there is an OOM error */
175 static CSV_NOINLINE int csv_resize_and_append(CsvReader *p, char c){
176 char *zNew;
177 int nNew = p->nAlloc*2 + 100;
178 zNew = sqlite3_realloc64(p->z, nNew);
179 if( zNew ){
180 p->z = zNew;
181 p->nAlloc = nNew;
182 p->z[p->n++] = c;
183 return 0;
184 }else{
185 csv_errmsg(p, "out of memory");
186 return 1;
190 /* Append a single character to the CsvReader.z[] array.
191 ** Return 0 on success and non-zero if there is an OOM error */
192 static int csv_append(CsvReader *p, char c){
193 if( p->n>=p->nAlloc-1 ) return csv_resize_and_append(p, c);
194 p->z[p->n++] = c;
195 return 0;
198 /* Read a single field of CSV text. Compatible with rfc4180 and extended
199 ** with the option of having a separator other than ",".
201 ** + Input comes from p->in.
202 ** + Store results in p->z of length p->n. Space to hold p->z comes
203 ** from sqlite3_malloc64().
204 ** + Keep track of the line number in p->nLine.
205 ** + Store the character that terminates the field in p->cTerm. Store
206 ** EOF on end-of-file.
208 ** Return 0 at EOF or on OOM. On EOF, the p->cTerm character will have
209 ** been set to EOF.
211 static char *csv_read_one_field(CsvReader *p){
212 int c;
213 p->n = 0;
214 c = csv_getc(p);
215 if( c==EOF ){
216 p->cTerm = EOF;
217 return 0;
219 if( c=='"' ){
220 int pc, ppc;
221 int startLine = p->nLine;
222 pc = ppc = 0;
223 while( 1 ){
224 c = csv_getc(p);
225 if( c<='"' || pc=='"' ){
226 if( c=='\n' ) p->nLine++;
227 if( c=='"' ){
228 if( pc=='"' ){
229 pc = 0;
230 continue;
233 if( (c==',' && pc=='"')
234 || (c=='\n' && pc=='"')
235 || (c=='\n' && pc=='\r' && ppc=='"')
236 || (c==EOF && pc=='"')
238 do{ p->n--; }while( p->z[p->n]!='"' );
239 p->cTerm = (char)c;
240 break;
242 if( pc=='"' && c!='\r' ){
243 csv_errmsg(p, "line %d: unescaped %c character", p->nLine, '"');
244 break;
246 if( c==EOF ){
247 csv_errmsg(p, "line %d: unterminated %c-quoted field\n",
248 startLine, '"');
249 p->cTerm = (char)c;
250 break;
253 if( csv_append(p, (char)c) ) return 0;
254 ppc = pc;
255 pc = c;
257 }else{
258 /* If this is the first field being parsed and it begins with the
259 ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
260 if( (c&0xff)==0xef && p->bNotFirst==0 ){
261 csv_append(p, (char)c);
262 c = csv_getc(p);
263 if( (c&0xff)==0xbb ){
264 csv_append(p, (char)c);
265 c = csv_getc(p);
266 if( (c&0xff)==0xbf ){
267 p->bNotFirst = 1;
268 p->n = 0;
269 return csv_read_one_field(p);
273 while( c>',' || (c!=EOF && c!=',' && c!='\n') ){
274 if( csv_append(p, (char)c) ) return 0;
275 c = csv_getc(p);
277 if( c=='\n' ){
278 p->nLine++;
279 if( p->n>0 && p->z[p->n-1]=='\r' ) p->n--;
281 p->cTerm = (char)c;
283 assert( p->z==0 || p->n<p->nAlloc );
284 if( p->z ) p->z[p->n] = 0;
285 p->bNotFirst = 1;
286 return p->z;
290 /* Forward references to the various virtual table methods implemented
291 ** in this file. */
292 static int csvtabCreate(sqlite3*, void*, int, const char*const*,
293 sqlite3_vtab**,char**);
294 static int csvtabConnect(sqlite3*, void*, int, const char*const*,
295 sqlite3_vtab**,char**);
296 static int csvtabBestIndex(sqlite3_vtab*,sqlite3_index_info*);
297 static int csvtabDisconnect(sqlite3_vtab*);
298 static int csvtabOpen(sqlite3_vtab*, sqlite3_vtab_cursor**);
299 static int csvtabClose(sqlite3_vtab_cursor*);
300 static int csvtabFilter(sqlite3_vtab_cursor*, int idxNum, const char *idxStr,
301 int argc, sqlite3_value **argv);
302 static int csvtabNext(sqlite3_vtab_cursor*);
303 static int csvtabEof(sqlite3_vtab_cursor*);
304 static int csvtabColumn(sqlite3_vtab_cursor*,sqlite3_context*,int);
305 static int csvtabRowid(sqlite3_vtab_cursor*,sqlite3_int64*);
307 /* An instance of the CSV virtual table */
308 typedef struct CsvTable {
309 sqlite3_vtab base; /* Base class. Must be first */
310 char *zFilename; /* Name of the CSV file */
311 char *zData; /* Raw CSV data in lieu of zFilename */
312 long iStart; /* Offset to start of data in zFilename */
313 int nCol; /* Number of columns in the CSV file */
314 unsigned int tstFlags; /* Bit values used for testing */
315 } CsvTable;
317 /* Allowed values for tstFlags */
318 #define CSVTEST_FIDX 0x0001 /* Pretend that constrained searchs cost less*/
320 /* A cursor for the CSV virtual table */
321 typedef struct CsvCursor {
322 sqlite3_vtab_cursor base; /* Base class. Must be first */
323 CsvReader rdr; /* The CsvReader object */
324 char **azVal; /* Value of the current row */
325 int *aLen; /* Length of each entry */
326 sqlite3_int64 iRowid; /* The current rowid. Negative for EOF */
327 } CsvCursor;
329 /* Transfer error message text from a reader into a CsvTable */
330 static void csv_xfer_error(CsvTable *pTab, CsvReader *pRdr){
331 sqlite3_free(pTab->base.zErrMsg);
332 pTab->base.zErrMsg = sqlite3_mprintf("%s", pRdr->zErr);
336 ** This method is the destructor fo a CsvTable object.
338 static int csvtabDisconnect(sqlite3_vtab *pVtab){
339 CsvTable *p = (CsvTable*)pVtab;
340 sqlite3_free(p->zFilename);
341 sqlite3_free(p->zData);
342 sqlite3_free(p);
343 return SQLITE_OK;
346 /* Skip leading whitespace. Return a pointer to the first non-whitespace
347 ** character, or to the zero terminator if the string has only whitespace */
348 static const char *csv_skip_whitespace(const char *z){
349 while( isspace((unsigned char)z[0]) ) z++;
350 return z;
353 /* Remove trailing whitespace from the end of string z[] */
354 static void csv_trim_whitespace(char *z){
355 size_t n = strlen(z);
356 while( n>0 && isspace((unsigned char)z[n]) ) n--;
357 z[n] = 0;
360 /* Dequote the string */
361 static void csv_dequote(char *z){
362 int j;
363 char cQuote = z[0];
364 size_t i, n;
366 if( cQuote!='\'' && cQuote!='"' ) return;
367 n = strlen(z);
368 if( n<2 || z[n-1]!=z[0] ) return;
369 for(i=1, j=0; i<n-1; i++){
370 if( z[i]==cQuote && z[i+1]==cQuote ) i++;
371 z[j++] = z[i];
373 z[j] = 0;
376 /* Check to see if the string is of the form: "TAG = VALUE" with optional
377 ** whitespace before and around tokens. If it is, return a pointer to the
378 ** first character of VALUE. If it is not, return NULL.
380 static const char *csv_parameter(const char *zTag, int nTag, const char *z){
381 z = csv_skip_whitespace(z);
382 if( strncmp(zTag, z, nTag)!=0 ) return 0;
383 z = csv_skip_whitespace(z+nTag);
384 if( z[0]!='=' ) return 0;
385 return csv_skip_whitespace(z+1);
388 /* Decode a parameter that requires a dequoted string.
390 ** Return 1 if the parameter is seen, or 0 if not. 1 is returned
391 ** even if there is an error. If an error occurs, then an error message
392 ** is left in p->zErr. If there are no errors, p->zErr[0]==0.
394 static int csv_string_parameter(
395 CsvReader *p, /* Leave the error message here, if there is one */
396 const char *zParam, /* Parameter we are checking for */
397 const char *zArg, /* Raw text of the virtual table argment */
398 char **pzVal /* Write the dequoted string value here */
400 const char *zValue;
401 zValue = csv_parameter(zParam,(int)strlen(zParam),zArg);
402 if( zValue==0 ) return 0;
403 p->zErr[0] = 0;
404 if( *pzVal ){
405 csv_errmsg(p, "more than one '%s' parameter", zParam);
406 return 1;
408 *pzVal = sqlite3_mprintf("%s", zValue);
409 if( *pzVal==0 ){
410 csv_errmsg(p, "out of memory");
411 return 1;
413 csv_trim_whitespace(*pzVal);
414 csv_dequote(*pzVal);
415 return 1;
419 /* Return 0 if the argument is false and 1 if it is true. Return -1 if
420 ** we cannot really tell.
422 static int csv_boolean(const char *z){
423 if( sqlite3_stricmp("yes",z)==0
424 || sqlite3_stricmp("on",z)==0
425 || sqlite3_stricmp("true",z)==0
426 || (z[0]=='1' && z[1]==0)
428 return 1;
430 if( sqlite3_stricmp("no",z)==0
431 || sqlite3_stricmp("off",z)==0
432 || sqlite3_stricmp("false",z)==0
433 || (z[0]=='0' && z[1]==0)
435 return 0;
437 return -1;
440 /* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
441 ** If it is, set *pValue to be the value of the boolean ("true" if there is
442 ** not "= BOOLEAN" component) and return non-zero. If the input string
443 ** does not begin with TAG, return zero.
445 static int csv_boolean_parameter(
446 const char *zTag, /* Tag we are looking for */
447 int nTag, /* Size of the tag in bytes */
448 const char *z, /* Input parameter */
449 int *pValue /* Write boolean value here */
451 int b;
452 z = csv_skip_whitespace(z);
453 if( strncmp(zTag, z, nTag)!=0 ) return 0;
454 z = csv_skip_whitespace(z + nTag);
455 if( z[0]==0 ){
456 *pValue = 1;
457 return 1;
459 if( z[0]!='=' ) return 0;
460 z = csv_skip_whitespace(z+1);
461 b = csv_boolean(z);
462 if( b>=0 ){
463 *pValue = b;
464 return 1;
466 return 0;
470 ** Parameters:
471 ** filename=FILENAME Name of file containing CSV content
472 ** data=TEXT Direct CSV content.
473 ** schema=SCHEMA Alternative CSV schema.
474 ** header=YES|NO First row of CSV defines the names of
475 ** columns if "yes". Default "no".
476 ** columns=N Assume the CSV file contains N columns.
478 ** Only available if compiled with SQLITE_TEST:
480 ** testflags=N Bitmask of test flags. Optional
482 ** If schema= is omitted, then the columns are named "c0", "c1", "c2",
483 ** and so forth. If columns=N is omitted, then the file is opened and
484 ** the number of columns in the first row is counted to determine the
485 ** column count. If header=YES, then the first row is skipped.
487 static int csvtabConnect(
488 sqlite3 *db,
489 void *pAux,
490 int argc, const char *const*argv,
491 sqlite3_vtab **ppVtab,
492 char **pzErr
494 CsvTable *pNew = 0; /* The CsvTable object to construct */
495 int bHeader = -1; /* header= flags. -1 means not seen yet */
496 int rc = SQLITE_OK; /* Result code from this routine */
497 int i, j; /* Loop counters */
498 #ifdef SQLITE_TEST
499 int tstFlags = 0; /* Value for testflags=N parameter */
500 #endif
501 int b; /* Value of a boolean parameter */
502 int nCol = -99; /* Value of the columns= parameter */
503 CsvReader sRdr; /* A CSV file reader used to store an error
504 ** message and/or to count the number of columns */
505 static const char *azParam[] = {
506 "filename", "data", "schema",
508 char *azPValue[3]; /* Parameter values */
509 # define CSV_FILENAME (azPValue[0])
510 # define CSV_DATA (azPValue[1])
511 # define CSV_SCHEMA (azPValue[2])
514 assert( sizeof(azPValue)==sizeof(azParam) );
515 memset(&sRdr, 0, sizeof(sRdr));
516 memset(azPValue, 0, sizeof(azPValue));
517 for(i=3; i<argc; i++){
518 const char *z = argv[i];
519 const char *zValue;
520 for(j=0; j<sizeof(azParam)/sizeof(azParam[0]); j++){
521 if( csv_string_parameter(&sRdr, azParam[j], z, &azPValue[j]) ) break;
523 if( j<sizeof(azParam)/sizeof(azParam[0]) ){
524 if( sRdr.zErr[0] ) goto csvtab_connect_error;
525 }else
526 if( csv_boolean_parameter("header",6,z,&b) ){
527 if( bHeader>=0 ){
528 csv_errmsg(&sRdr, "more than one 'header' parameter");
529 goto csvtab_connect_error;
531 bHeader = b;
532 }else
533 #ifdef SQLITE_TEST
534 if( (zValue = csv_parameter("testflags",9,z))!=0 ){
535 tstFlags = (unsigned int)atoi(zValue);
536 }else
537 #endif
538 if( (zValue = csv_parameter("columns",7,z))!=0 ){
539 if( nCol>0 ){
540 csv_errmsg(&sRdr, "more than one 'columns' parameter");
541 goto csvtab_connect_error;
543 nCol = atoi(zValue);
544 if( nCol<=0 ){
545 csv_errmsg(&sRdr, "column= value must be positive");
546 goto csvtab_connect_error;
548 }else
550 csv_errmsg(&sRdr, "bad parameter: '%s'", z);
551 goto csvtab_connect_error;
554 if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
555 csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
556 goto csvtab_connect_error;
559 if( (nCol<=0 || bHeader==1)
560 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
562 goto csvtab_connect_error;
564 pNew = sqlite3_malloc( sizeof(*pNew) );
565 *ppVtab = (sqlite3_vtab*)pNew;
566 if( pNew==0 ) goto csvtab_connect_oom;
567 memset(pNew, 0, sizeof(*pNew));
568 if( CSV_SCHEMA==0 ){
569 sqlite3_str *pStr = sqlite3_str_new(0);
570 char *zSep = "";
571 int iCol = 0;
572 sqlite3_str_appendf(pStr, "CREATE TABLE x(");
573 if( nCol<0 && bHeader<1 ){
574 nCol = 0;
576 csv_read_one_field(&sRdr);
577 nCol++;
578 }while( sRdr.cTerm==',' );
580 if( nCol>0 && bHeader<1 ){
581 for(iCol=0; iCol<nCol; iCol++){
582 sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
583 zSep = ",";
585 }else{
587 char *z = csv_read_one_field(&sRdr);
588 if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
589 sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
590 zSep = ",";
591 iCol++;
593 }while( sRdr.cTerm==',' );
594 if( nCol<0 ){
595 nCol = iCol;
596 }else{
597 while( iCol<nCol ){
598 sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
599 zSep = ",";
603 pNew->nCol = nCol;
604 sqlite3_str_appendf(pStr, ")");
605 CSV_SCHEMA = sqlite3_str_finish(pStr);
606 if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
607 }else if( nCol<0 ){
609 csv_read_one_field(&sRdr);
610 pNew->nCol++;
611 }while( sRdr.cTerm==',' );
612 }else{
613 pNew->nCol = nCol;
615 pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
616 pNew->zData = CSV_DATA; CSV_DATA = 0;
617 #ifdef SQLITE_TEST
618 pNew->tstFlags = tstFlags;
619 #endif
620 if( bHeader!=1 ){
621 pNew->iStart = 0;
622 }else if( pNew->zData ){
623 pNew->iStart = (int)sRdr.iIn;
624 }else{
625 pNew->iStart = (int)(ftell(sRdr.in) - sRdr.nIn + sRdr.iIn);
627 csv_reader_reset(&sRdr);
628 rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
629 if( rc ){
630 csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
631 goto csvtab_connect_error;
633 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
634 sqlite3_free(azPValue[i]);
636 /* Rationale for DIRECTONLY:
637 ** An attacker who controls a database schema could use this vtab
638 ** to exfiltrate sensitive data from other files in the filesystem.
639 ** And, recommended practice is to put all CSV virtual tables in the
640 ** TEMP namespace, so they should still be usable from within TEMP
641 ** views, so there shouldn't be a serious loss of functionality by
642 ** prohibiting the use of this vtab from persistent triggers and views.
644 sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
645 return SQLITE_OK;
647 csvtab_connect_oom:
648 rc = SQLITE_NOMEM;
649 csv_errmsg(&sRdr, "out of memory");
651 csvtab_connect_error:
652 if( pNew ) csvtabDisconnect(&pNew->base);
653 for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
654 sqlite3_free(azPValue[i]);
656 if( sRdr.zErr[0] ){
657 sqlite3_free(*pzErr);
658 *pzErr = sqlite3_mprintf("%s", sRdr.zErr);
660 csv_reader_reset(&sRdr);
661 if( rc==SQLITE_OK ) rc = SQLITE_ERROR;
662 return rc;
666 ** Reset the current row content held by a CsvCursor.
668 static void csvtabCursorRowReset(CsvCursor *pCur){
669 CsvTable *pTab = (CsvTable*)pCur->base.pVtab;
670 int i;
671 for(i=0; i<pTab->nCol; i++){
672 sqlite3_free(pCur->azVal[i]);
673 pCur->azVal[i] = 0;
674 pCur->aLen[i] = 0;
679 ** The xConnect and xCreate methods do the same thing, but they must be
680 ** different so that the virtual table is not an eponymous virtual table.
682 static int csvtabCreate(
683 sqlite3 *db,
684 void *pAux,
685 int argc, const char *const*argv,
686 sqlite3_vtab **ppVtab,
687 char **pzErr
689 return csvtabConnect(db, pAux, argc, argv, ppVtab, pzErr);
693 ** Destructor for a CsvCursor.
695 static int csvtabClose(sqlite3_vtab_cursor *cur){
696 CsvCursor *pCur = (CsvCursor*)cur;
697 csvtabCursorRowReset(pCur);
698 csv_reader_reset(&pCur->rdr);
699 sqlite3_free(cur);
700 return SQLITE_OK;
704 ** Constructor for a new CsvTable cursor object.
706 static int csvtabOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){
707 CsvTable *pTab = (CsvTable*)p;
708 CsvCursor *pCur;
709 size_t nByte;
710 nByte = sizeof(*pCur) + (sizeof(char*)+sizeof(int))*pTab->nCol;
711 pCur = sqlite3_malloc64( nByte );
712 if( pCur==0 ) return SQLITE_NOMEM;
713 memset(pCur, 0, nByte);
714 pCur->azVal = (char**)&pCur[1];
715 pCur->aLen = (int*)&pCur->azVal[pTab->nCol];
716 *ppCursor = &pCur->base;
717 if( csv_reader_open(&pCur->rdr, pTab->zFilename, pTab->zData) ){
718 csv_xfer_error(pTab, &pCur->rdr);
719 return SQLITE_ERROR;
721 return SQLITE_OK;
726 ** Advance a CsvCursor to its next row of input.
727 ** Set the EOF marker if we reach the end of input.
729 static int csvtabNext(sqlite3_vtab_cursor *cur){
730 CsvCursor *pCur = (CsvCursor*)cur;
731 CsvTable *pTab = (CsvTable*)cur->pVtab;
732 int i = 0;
733 char *z;
735 z = csv_read_one_field(&pCur->rdr);
736 if( z==0 ){
737 break;
739 if( i<pTab->nCol ){
740 if( pCur->aLen[i] < pCur->rdr.n+1 ){
741 char *zNew = sqlite3_realloc64(pCur->azVal[i], pCur->rdr.n+1);
742 if( zNew==0 ){
743 csv_errmsg(&pCur->rdr, "out of memory");
744 csv_xfer_error(pTab, &pCur->rdr);
745 break;
747 pCur->azVal[i] = zNew;
748 pCur->aLen[i] = pCur->rdr.n+1;
750 memcpy(pCur->azVal[i], z, pCur->rdr.n+1);
751 i++;
753 }while( pCur->rdr.cTerm==',' );
754 if( z==0 && i==0 ){
755 pCur->iRowid = -1;
756 }else{
757 pCur->iRowid++;
758 while( i<pTab->nCol ){
759 sqlite3_free(pCur->azVal[i]);
760 pCur->azVal[i] = 0;
761 pCur->aLen[i] = 0;
762 i++;
765 return SQLITE_OK;
769 ** Return values of columns for the row at which the CsvCursor
770 ** is currently pointing.
772 static int csvtabColumn(
773 sqlite3_vtab_cursor *cur, /* The cursor */
774 sqlite3_context *ctx, /* First argument to sqlite3_result_...() */
775 int i /* Which column to return */
777 CsvCursor *pCur = (CsvCursor*)cur;
778 CsvTable *pTab = (CsvTable*)cur->pVtab;
779 if( i>=0 && i<pTab->nCol && pCur->azVal[i]!=0 ){
780 sqlite3_result_text(ctx, pCur->azVal[i], -1, SQLITE_TRANSIENT);
782 return SQLITE_OK;
786 ** Return the rowid for the current row.
788 static int csvtabRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
789 CsvCursor *pCur = (CsvCursor*)cur;
790 *pRowid = pCur->iRowid;
791 return SQLITE_OK;
795 ** Return TRUE if the cursor has been moved off of the last
796 ** row of output.
798 static int csvtabEof(sqlite3_vtab_cursor *cur){
799 CsvCursor *pCur = (CsvCursor*)cur;
800 return pCur->iRowid<0;
804 ** Only a full table scan is supported. So xFilter simply rewinds to
805 ** the beginning.
807 static int csvtabFilter(
808 sqlite3_vtab_cursor *pVtabCursor,
809 int idxNum, const char *idxStr,
810 int argc, sqlite3_value **argv
812 CsvCursor *pCur = (CsvCursor*)pVtabCursor;
813 CsvTable *pTab = (CsvTable*)pVtabCursor->pVtab;
814 pCur->iRowid = 0;
816 /* Ensure the field buffer is always allocated. Otherwise, if the
817 ** first field is zero bytes in size, this may be mistaken for an OOM
818 ** error in csvtabNext(). */
819 if( csv_append(&pCur->rdr, 0) ) return SQLITE_NOMEM;
821 if( pCur->rdr.in==0 ){
822 assert( pCur->rdr.zIn==pTab->zData );
823 assert( pTab->iStart>=0 );
824 assert( (size_t)pTab->iStart<=pCur->rdr.nIn );
825 pCur->rdr.iIn = pTab->iStart;
826 }else{
827 fseek(pCur->rdr.in, pTab->iStart, SEEK_SET);
828 pCur->rdr.iIn = 0;
829 pCur->rdr.nIn = 0;
831 return csvtabNext(pVtabCursor);
835 ** Only a forward full table scan is supported. xBestIndex is mostly
836 ** a no-op. If CSVTEST_FIDX is set, then the presence of equality
837 ** constraints lowers the estimated cost, which is fiction, but is useful
838 ** for testing certain kinds of virtual table behavior.
840 static int csvtabBestIndex(
841 sqlite3_vtab *tab,
842 sqlite3_index_info *pIdxInfo
844 pIdxInfo->estimatedCost = 1000000;
845 #ifdef SQLITE_TEST
846 if( (((CsvTable*)tab)->tstFlags & CSVTEST_FIDX)!=0 ){
847 /* The usual (and sensible) case is to always do a full table scan.
848 ** The code in this branch only runs when testflags=1. This code
849 ** generates an artifical and unrealistic plan which is useful
850 ** for testing virtual table logic but is not helpful to real applications.
852 ** Any ==, LIKE, or GLOB constraint is marked as usable by the virtual
853 ** table (even though it is not) and the cost of running the virtual table
854 ** is reduced from 1 million to just 10. The constraints are *not* marked
855 ** as omittable, however, so the query planner should still generate a
856 ** plan that gives a correct answer, even if they plan is not optimal.
858 int i;
859 int nConst = 0;
860 for(i=0; i<pIdxInfo->nConstraint; i++){
861 unsigned char op;
862 if( pIdxInfo->aConstraint[i].usable==0 ) continue;
863 op = pIdxInfo->aConstraint[i].op;
864 if( op==SQLITE_INDEX_CONSTRAINT_EQ
865 || op==SQLITE_INDEX_CONSTRAINT_LIKE
866 || op==SQLITE_INDEX_CONSTRAINT_GLOB
868 pIdxInfo->estimatedCost = 10;
869 pIdxInfo->aConstraintUsage[nConst].argvIndex = nConst+1;
870 nConst++;
874 #endif
875 return SQLITE_OK;
879 static sqlite3_module CsvModule = {
880 0, /* iVersion */
881 csvtabCreate, /* xCreate */
882 csvtabConnect, /* xConnect */
883 csvtabBestIndex, /* xBestIndex */
884 csvtabDisconnect, /* xDisconnect */
885 csvtabDisconnect, /* xDestroy */
886 csvtabOpen, /* xOpen - open a cursor */
887 csvtabClose, /* xClose - close a cursor */
888 csvtabFilter, /* xFilter - configure scan constraints */
889 csvtabNext, /* xNext - advance a cursor */
890 csvtabEof, /* xEof - check for end of scan */
891 csvtabColumn, /* xColumn - read data */
892 csvtabRowid, /* xRowid - read data */
893 0, /* xUpdate */
894 0, /* xBegin */
895 0, /* xSync */
896 0, /* xCommit */
897 0, /* xRollback */
898 0, /* xFindMethod */
899 0, /* xRename */
900 0, /* xSavepoint */
901 0, /* xRelease */
902 0, /* xRollbackTo */
903 0, /* xShadowName */
904 0 /* xIntegrity */
907 #ifdef SQLITE_TEST
909 ** For virtual table testing, make a version of the CSV virtual table
910 ** available that has an xUpdate function. But the xUpdate always returns
911 ** SQLITE_READONLY since the CSV file is not really writable.
913 static int csvtabUpdate(sqlite3_vtab *p,int n,sqlite3_value**v,sqlite3_int64*x){
914 return SQLITE_READONLY;
916 static sqlite3_module CsvModuleFauxWrite = {
917 0, /* iVersion */
918 csvtabCreate, /* xCreate */
919 csvtabConnect, /* xConnect */
920 csvtabBestIndex, /* xBestIndex */
921 csvtabDisconnect, /* xDisconnect */
922 csvtabDisconnect, /* xDestroy */
923 csvtabOpen, /* xOpen - open a cursor */
924 csvtabClose, /* xClose - close a cursor */
925 csvtabFilter, /* xFilter - configure scan constraints */
926 csvtabNext, /* xNext - advance a cursor */
927 csvtabEof, /* xEof - check for end of scan */
928 csvtabColumn, /* xColumn - read data */
929 csvtabRowid, /* xRowid - read data */
930 csvtabUpdate, /* xUpdate */
931 0, /* xBegin */
932 0, /* xSync */
933 0, /* xCommit */
934 0, /* xRollback */
935 0, /* xFindMethod */
936 0, /* xRename */
937 0, /* xSavepoint */
938 0, /* xRelease */
939 0, /* xRollbackTo */
940 0, /* xShadowName */
941 0 /* xIntegrity */
943 #endif /* SQLITE_TEST */
945 #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */
948 #ifdef _WIN32
949 __declspec(dllexport)
950 #endif
952 ** This routine is called when the extension is loaded. The new
953 ** CSV virtual table module is registered with the calling database
954 ** connection.
956 int sqlite3_csv_init(
957 sqlite3 *db,
958 char **pzErrMsg,
959 const sqlite3_api_routines *pApi
961 #ifndef SQLITE_OMIT_VIRTUALTABLE
962 int rc;
963 SQLITE_EXTENSION_INIT2(pApi);
964 rc = sqlite3_create_module(db, "csv", &CsvModule, 0);
965 #ifdef SQLITE_TEST
966 if( rc==SQLITE_OK ){
967 rc = sqlite3_create_module(db, "csv_wr", &CsvModuleFauxWrite, 0);
969 #endif
970 return rc;
971 #else
972 return SQLITE_OK;
973 #endif