4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
12 ** An tokenizer for SQL
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
20 #include "sqliteInt.h"
26 ** The sqlite3KeywordCode function looks up an identifier to determine if
27 ** it is a keyword. If it is a keyword, the token code of that keyword is
28 ** returned. If the input is not a keyword, TK_ID is returned.
30 ** The implementation of this routine was generated by a program,
31 ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
32 ** The output of the mkkeywordhash.c program is written into a file
33 ** named keywordhash.h and then included into this source file by
34 ** the #include below.
36 #include "keywordhash.h"
40 ** If X is a character that can be used in an identifier and
41 ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then
42 ** X is always an identifier character. (Hence all UTF-8
43 ** characters can be part of an identifier). isIdChar[X] will
44 ** be 0 for every character in the lower 128 ASCII characters
45 ** that cannot be used as part of an identifier.
47 ** In this implementation, an identifier can be a string of
48 ** alphabetic characters, digits, and "_" plus any character
49 ** with the high-order bit set. The latter rule means that
50 ** any sequence of UTF-8 characters or characters taken from
51 ** an extended ISO8859 character set can form an identifier.
53 ** Ticket #1066. the SQL standard does not allow '$' in the
54 ** middle of identfiers. But many SQL implementations do.
55 ** SQLite will allow '$' in identifiers for compatibility.
56 ** But the feature is undocumented.
58 static const char isIdChar
[] = {
59 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
60 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
62 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
64 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
68 #define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
71 ** Return the length of the token that begins at z[0].
72 ** Store the token type in *tokenType before returning.
74 static int getToken(const unsigned char *z
, int *tokenType
){
77 case ' ': case '\t': case '\n': case '\f': case '\r': {
78 for(i
=1; isspace(z
[i
]); i
++){}
79 *tokenType
= TK_SPACE
;
84 for(i
=2; (c
=z
[i
])!=0 && c
!='\n'; i
++){}
85 *tokenType
= TK_COMMENT
;
88 *tokenType
= TK_MINUS
;
100 *tokenType
= TK_SEMI
;
104 *tokenType
= TK_PLUS
;
108 *tokenType
= TK_STAR
;
112 if( z
[1]!='*' || z
[2]==0 ){
113 *tokenType
= TK_SLASH
;
116 for(i
=3, c
=z
[2]; (c
!='*' || z
[i
]!='/') && (c
=z
[i
])!=0; i
++){}
118 *tokenType
= TK_COMMENT
;
127 return 1 + (z
[1]=='=');
137 *tokenType
= TK_LSHIFT
;
149 *tokenType
= TK_RSHIFT
;
158 *tokenType
= TK_ILLEGAL
;
167 *tokenType
= TK_BITOR
;
170 *tokenType
= TK_CONCAT
;
175 *tokenType
= TK_COMMA
;
179 *tokenType
= TK_BITAND
;
183 *tokenType
= TK_BITNOT
;
187 for(i
=1; isdigit(z
[i
]) || (i
==1 && z
[1]=='-'); i
++){}
188 *tokenType
= TK_REGISTER
;
191 case '\'': case '"': {
193 for(i
=1; (c
=z
[i
])!=0; i
++){
203 *tokenType
= TK_STRING
;
210 case '0': case '1': case '2': case '3': case '4':
211 case '5': case '6': case '7': case '8': case '9': {
212 *tokenType
= TK_INTEGER
;
213 for(i
=1; isdigit(z
[i
]); i
++){}
214 #ifndef SQLITE_OMIT_FLOATING_POINT
215 if( z
[i
]=='.' && isdigit(z
[i
+1]) ){
217 while( isdigit(z
[i
]) ){ i
++; }
218 *tokenType
= TK_FLOAT
;
220 if( (z
[i
]=='e' || z
[i
]=='E') &&
222 || ((z
[i
+1]=='+' || z
[i
+1]=='-') && isdigit(z
[i
+2]))
226 while( isdigit(z
[i
]) ){ i
++; }
227 *tokenType
= TK_FLOAT
;
233 for(i
=1, c
=z
[0]; c
!=']' && (c
=z
[i
])!=0; i
++){}
238 *tokenType
= TK_VARIABLE
;
239 for(i
=1; isdigit(z
[i
]); i
++){}
243 for(i
=1; IdChar(z
[i
]); i
++){}
244 *tokenType
= i
>1 ? TK_VARIABLE
: TK_ILLEGAL
;
247 #ifndef SQLITE_OMIT_TCL_VARIABLE
249 *tokenType
= TK_VARIABLE
;
252 for(i
=2; (c
=z
[i
])!=0 && nBrace
; i
++){
259 if( c
==0 ) *tokenType
= TK_ILLEGAL
;
262 for(i
=1; (c
=z
[i
])!=0; i
++){
263 if( isalnum(c
) || c
=='_' ){
265 }else if( c
=='(' && n
>0 ){
268 }while( (c
=z
[i
])!=0 && !isspace(c
) && c
!=')' );
272 *tokenType
= TK_ILLEGAL
;
275 }else if( c
==':' && z
[i
+1]==':' ){
281 if( n
==0 ) *tokenType
= TK_ILLEGAL
;
286 #ifndef SQLITE_OMIT_BLOB_LITERAL
287 case 'x': case 'X': {
288 if( (c
=z
[1])=='\'' || c
=='"' ){
290 *tokenType
= TK_BLOB
;
291 for(i
=2; (c
=z
[i
])!=0; i
++){
293 if( i
%2 ) *tokenType
= TK_ILLEGAL
;
297 *tokenType
= TK_ILLEGAL
;
304 /* Otherwise fall through to the next case */
311 for(i
=1; IdChar(z
[i
]); i
++){}
312 *tokenType
= keywordCode((char*)z
, i
);
316 *tokenType
= TK_ILLEGAL
;
319 int sqlite3GetToken(const unsigned char *z
, int *tokenType
){
320 return getToken(z
, tokenType
);
324 ** Run the parser on the given SQL string. The parser structure is
325 ** passed in. An SQLITE_ status code is returned. If an error occurs
326 ** and pzErrMsg!=NULL then an error message might be written into
327 ** memory obtained from malloc() and *pzErrMsg made to point to that
328 ** error message. Or maybe not.
330 int sqlite3RunParser(Parse
*pParse
, const char *zSql
, STRPTR
*pzErrMsg
){
335 int lastTokenParsed
= -1;
336 sqlite3
*db
= pParse
->db
;
337 extern void *sqlite3ParserAlloc(void*(*)(int));
338 extern void sqlite3ParserFree(void*, void(*)(void*));
339 extern int sqlite3Parser(void*, int, Token
, Parse
*);
341 db
->flags
&= ~SQLITE_Interrupt
;
342 pParse
->rc
= SQLITE_OK
;
344 pEngine
= sqlite3ParserAlloc((void*(*)(int))sqlite3MallocX
);
346 sqlite3SetString(pzErrMsg
, "out of memory", (char*)0);
349 assert( pParse
->sLastToken
.dyn
==0 );
350 assert( pParse
->pNewTable
==0 );
351 assert( pParse
->pNewTrigger
==0 );
352 assert( pParse
->nVar
==0 );
353 assert( pParse
->nVarExpr
==0 );
354 assert( pParse
->nVarExprAlloc
==0 );
355 assert( pParse
->apVarExpr
==0 );
356 pParse
->zTail
= pParse
->zSql
= zSql
;
357 while( sqlite3_malloc_failed
==0 && zSql
[i
]!=0 ){
359 pParse
->sLastToken
.z
= &zSql
[i
];
360 assert( pParse
->sLastToken
.dyn
==0 );
361 pParse
->sLastToken
.n
= getToken((unsigned char*)&zSql
[i
],&tokenType
);
362 i
+= pParse
->sLastToken
.n
;
366 if( (db
->flags
& SQLITE_Interrupt
)!=0 ){
367 pParse
->rc
= SQLITE_INTERRUPT
;
368 sqlite3SetString(pzErrMsg
, "interrupt", (char*)0);
375 sqliteFree(*pzErrMsg
);
376 *pzErrMsg
= sqlite3MPrintf("unrecognized token: \"%T\"",
377 &pParse
->sLastToken
);
383 pParse
->zTail
= &zSql
[i
];
384 /* Fall thru into the default case */
387 sqlite3Parser(pEngine
, tokenType
, pParse
->sLastToken
, pParse
);
388 lastTokenParsed
= tokenType
;
389 if( pParse
->rc
!=SQLITE_OK
){
397 if( zSql
[i
]==0 && nErr
==0 && pParse
->rc
==SQLITE_OK
){
398 if( lastTokenParsed
!=TK_SEMI
){
399 sqlite3Parser(pEngine
, TK_SEMI
, pParse
->sLastToken
, pParse
);
400 pParse
->zTail
= &zSql
[i
];
402 sqlite3Parser(pEngine
, 0, pParse
->sLastToken
, pParse
);
404 sqlite3ParserFree(pEngine
, sqlite3FreeX
);
405 if( sqlite3_malloc_failed
){
406 pParse
->rc
= SQLITE_NOMEM
;
408 if( pParse
->rc
!=SQLITE_OK
&& pParse
->rc
!=SQLITE_DONE
&& pParse
->zErrMsg
==0 ){
409 sqlite3SetString(&pParse
->zErrMsg
, sqlite3ErrStr(pParse
->rc
),
412 if( pParse
->zErrMsg
){
413 if( pzErrMsg
&& *pzErrMsg
==0 ){
414 *pzErrMsg
= pParse
->zErrMsg
;
416 sqliteFree(pParse
->zErrMsg
);
421 if( pParse
->pVdbe
&& pParse
->nErr
>0 && pParse
->nested
==0 ){
422 sqlite3VdbeDelete(pParse
->pVdbe
);
425 sqlite3DeleteTable(pParse
->db
, pParse
->pNewTable
);
426 sqlite3DeleteTrigger(pParse
->pNewTrigger
);
427 sqliteFree(pParse
->apVarExpr
);
428 if( nErr
>0 && (pParse
->rc
==SQLITE_OK
|| pParse
->rc
==SQLITE_DONE
) ){
429 pParse
->rc
= SQLITE_ERROR
;
434 /* The sqlite3_complete() API may be omitted (to save code space) by
435 ** defining the following symbol.
437 #ifndef SQLITE_OMIT_COMPLETE
440 ** Token types used by the sqlite3_complete() routine. See the header
441 ** comments on that procedure for additional information.
453 ** Return TRUE if the given SQL string ends in a semicolon.
455 ** Special handling is require for CREATE TRIGGER statements.
456 ** Whenever the CREATE TRIGGER keywords are seen, the statement
457 ** must end with ";END;".
459 ** This implementation uses a state machine with 7 states:
461 ** (0) START At the beginning or end of an SQL statement. This routine
462 ** returns 1 if it ends in the START state and 0 if it ends
463 ** in any other state.
465 ** (1) NORMAL We are in the middle of statement which ends with a single
468 ** (2) EXPLAIN The keyword EXPLAIN has been seen at the beginning of
471 ** (3) CREATE The keyword CREATE has been seen at the beginning of a
472 ** statement, possibly preceeded by EXPLAIN and/or followed by
475 ** (4) TRIGGER We are in the middle of a trigger definition that must be
476 ** ended by a semicolon, the keyword END, and another semicolon.
478 ** (5) SEMI We've seen the first semicolon in the ";END;" that occurs at
479 ** the end of a trigger definition.
481 ** (6) END We've seen the ";END" of the ";END;" that occurs at the end
482 ** of a trigger difinition.
484 ** Transitions between states above are determined by tokens extracted
485 ** from the input. The following tokens are significant:
487 ** (0) tkSEMI A semicolon.
488 ** (1) tkWS Whitespace
489 ** (2) tkOTHER Any other SQL token.
490 ** (3) tkEXPLAIN The "explain" keyword.
491 ** (4) tkCREATE The "create" keyword.
492 ** (5) tkTEMP The "temp" or "temporary" keyword.
493 ** (6) tkTRIGGER The "trigger" keyword.
494 ** (7) tkEND The "end" keyword.
496 ** Whitespace never causes a state transition and is always ignored.
498 ** If we compile with SQLITE_OMIT_TRIGGER, all of the computation needed
499 ** to recognize the end of a trigger can be omitted. All we have to do
500 ** is look for a semicolon that is not part of an string or comment.
502 int sqlite3_complete(const char *zSql
){
503 u8 state
= 0; /* Current state, using numbers defined in header comment */
504 u8 token
; /* Value of the next token */
506 #ifndef SQLITE_OMIT_TRIGGER
507 /* A complex statement machine used to detect the end of a CREATE TRIGGER
508 ** statement. This is the normal case.
510 static const u8 trans
[7][8] = {
512 /* State: ** SEMI WS OTHER EXPLAIN CREATE TEMP TRIGGER END */
513 /* 0 START: */ { 0, 0, 1, 2, 3, 1, 1, 1, },
514 /* 1 NORMAL: */ { 0, 1, 1, 1, 1, 1, 1, 1, },
515 /* 2 EXPLAIN: */ { 0, 2, 1, 1, 3, 1, 1, 1, },
516 /* 3 CREATE: */ { 0, 3, 1, 1, 1, 3, 4, 1, },
517 /* 4 TRIGGER: */ { 5, 4, 4, 4, 4, 4, 4, 4, },
518 /* 5 SEMI: */ { 5, 5, 4, 4, 4, 4, 4, 6, },
519 /* 6 END: */ { 0, 6, 4, 4, 4, 4, 4, 4, },
522 /* If triggers are not suppored by this compile then the statement machine
523 ** used to detect the end of a statement is much simplier
525 static const u8 trans
[2][3] = {
527 /* State: ** SEMI WS OTHER */
528 /* 0 START: */ { 0, 0, 1, },
529 /* 1 NORMAL: */ { 0, 1, 1, },
531 #endif /* SQLITE_OMIT_TRIGGER */
535 case ';': { /* A semicolon */
543 case '\f': { /* White space is ignored */
547 case '/': { /* C-style comments */
553 while( zSql
[0] && (zSql
[0]!='*' || zSql
[1]!='/') ){ zSql
++; }
554 if( zSql
[0]==0 ) return 0;
559 case '-': { /* SQL-style comments from "--" to end of line */
564 while( *zSql
&& *zSql
!='\n' ){ zSql
++; }
565 if( *zSql
==0 ) return state
==0;
569 case '[': { /* Microsoft-style identifiers in [...] */
571 while( *zSql
&& *zSql
!=']' ){ zSql
++; }
572 if( *zSql
==0 ) return 0;
576 case '"': /* single- and double-quoted strings */
580 while( *zSql
&& *zSql
!=c
){ zSql
++; }
581 if( *zSql
==0 ) return 0;
587 if( IdChar((u8
)*zSql
) ){
588 /* Keywords and unquoted identifiers */
590 for(nId
=1; IdChar(zSql
[nId
]); nId
++){}
591 #ifdef SQLITE_OMIT_TRIGGER
595 case 'c': case 'C': {
596 if( nId
==6 && sqlite3StrNICmp(zSql
, "create", 6)==0 ){
603 case 't': case 'T': {
604 if( nId
==7 && sqlite3StrNICmp(zSql
, "trigger", 7)==0 ){
606 }else if( nId
==4 && sqlite3StrNICmp(zSql
, "temp", 4)==0 ){
608 }else if( nId
==9 && sqlite3StrNICmp(zSql
, "temporary", 9)==0 ){
615 case 'e': case 'E': {
616 if( nId
==3 && sqlite3StrNICmp(zSql
, "end", 3)==0 ){
619 #ifndef SQLITE_OMIT_EXPLAIN
620 if( nId
==7 && sqlite3StrNICmp(zSql
, "explain", 7)==0 ){
634 #endif /* SQLITE_OMIT_TRIGGER */
637 /* Operators and special symbols */
643 state
= trans
[state
][token
];
649 #ifndef SQLITE_OMIT_UTF16
651 ** This routine is the same as the sqlite3_complete() routine described
652 ** above, except that the parameter is required to be UTF-16 encoded, not
655 int sqlite3_complete16(const void *zSql
){
660 pVal
= sqlite3ValueNew();
661 sqlite3ValueSetStr(pVal
, -1, zSql
, SQLITE_UTF16NATIVE
, SQLITE_STATIC
);
662 zSql8
= sqlite3ValueText(pVal
, SQLITE_UTF8
);
664 rc
= sqlite3_complete(zSql8
);
666 sqlite3ValueFree(pVal
);
669 #endif /* SQLITE_OMIT_UTF16 */
670 #endif /* SQLITE_OMIT_COMPLETE */