4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 *************************************************************************
12 ** A tokenizer for SQL
14 ** This file contains C code that splits an SQL input string up into
15 ** individual tokens and sends those tokens one-by-one over to the
16 ** parser for analysis.
29 ** All the keywords of the SQL language are stored as in a hash
30 ** table composed of instances of the following structure.
34 const WCHAR
*name
; /* The keyword name */
36 int tokenType
; /* The token value for this keyword */
39 #define MAX_TOKEN_LEN 11
42 ** These are the keywords
43 ** They MUST be in alphabetical order
45 #define X(str) str, ARRAY_SIZE(str) - 1
46 static const struct keyword aKeywordTable
[] = {
47 { X(L
"ADD"), TK_ADD
},
48 { X(L
"ALTER"), TK_ALTER
},
49 { X(L
"AND"), TK_AND
},
51 { X(L
"CHAR"), TK_CHAR
},
52 { X(L
"CHARACTER"), TK_CHAR
},
53 { X(L
"CREATE"), TK_CREATE
},
54 { X(L
"DELETE"), TK_DELETE
},
55 { X(L
"DISTINCT"), TK_DISTINCT
},
56 { X(L
"DROP"), TK_DROP
},
57 { X(L
"FREE"), TK_FREE
},
58 { X(L
"FROM"), TK_FROM
},
59 { X(L
"HOLD"), TK_HOLD
},
60 { X(L
"INSERT"), TK_INSERT
},
61 { X(L
"INT"), TK_INT
},
62 { X(L
"INTEGER"), TK_INT
},
63 { X(L
"INTO"), TK_INTO
},
65 { X(L
"KEY"), TK_KEY
},
66 { X(L
"LIKE"), TK_LIKE
},
67 { X(L
"LOCALIZABLE"), TK_LOCALIZABLE
},
68 { X(L
"LONG"), TK_LONG
},
69 { X(L
"LONGCHAR"), TK_LONGCHAR
},
70 { X(L
"NOT"), TK_NOT
},
71 { X(L
"NULL"), TK_NULL
},
72 { X(L
"OBJECT"), TK_OBJECT
},
74 { X(L
"ORDER"), TK_ORDER
},
75 { X(L
"PRIMARY"), TK_PRIMARY
},
76 { X(L
"SELECT"), TK_SELECT
},
77 { X(L
"SET"), TK_SET
},
78 { X(L
"SHORT"), TK_SHORT
},
79 { X(L
"TABLE"), TK_TABLE
},
80 { X(L
"TEMPORARY"), TK_TEMPORARY
},
81 { X(L
"UPDATE"), TK_UPDATE
},
82 { X(L
"VALUES"), TK_VALUES
},
83 { X(L
"WHERE"), TK_WHERE
},
88 ** Comparison function for binary search.
90 static int __cdecl
compKeyword(const void *m1
, const void *m2
){
91 const struct keyword
*k1
= m1
, *k2
= m2
;
92 int ret
, len
= min( k1
->len
, k2
->len
);
94 if ((ret
= wcsnicmp( k1
->name
, k2
->name
, len
))) return ret
;
95 if (k1
->len
< k2
->len
) return -1;
96 else if (k1
->len
> k2
->len
) return 1;
101 ** This function looks up an identifier to determine if it is a
102 ** keyword. If it is a keyword, the token code of that keyword is
103 ** returned. If the input is not a keyword, TK_ID is returned.
105 static int sqliteKeywordCode(const WCHAR
*z
, int n
){
106 struct keyword key
, *r
;
108 if( n
>MAX_TOKEN_LEN
)
114 r
= bsearch( &key
, aKeywordTable
, ARRAY_SIZE(aKeywordTable
), sizeof(struct keyword
), compKeyword
);
122 ** If X is a character that can be used in an identifier then
123 ** isIdChar[X] will be 1. Otherwise isIdChar[X] will be 0.
125 ** In this implementation, an identifier can be a string of
126 ** alphabetic characters, digits, and "_" plus any character
127 ** with the high-order bit set. The latter rule means that
128 ** any sequence of UTF-8 characters or characters taken from
129 ** an extended ISO8859 character set can form an identifier.
131 static const char isIdChar
[] = {
132 /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
133 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
134 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /* 2x */
136 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
137 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
138 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
139 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
140 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
141 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 8x */
142 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 9x */
143 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ax */
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Bx */
145 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Cx */
146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Dx */
147 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Ex */
148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* Fx */
152 ** WCHAR safe version of isdigit()
154 static inline int isDigit(WCHAR c
)
156 return c
>= '0' && c
<= '9';
160 ** WCHAR safe version of isspace(), except '\r'
162 static inline int isSpace(WCHAR c
)
164 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\f';
168 ** Return the length of the token that begins at z[0]. Return
169 ** -1 if the token is (or might be) incomplete. Store the token
170 ** type in *tokenType before returning.
172 int sqliteGetToken(const WCHAR
*z
, int *tokenType
, int *skip
){
177 case ' ': case '\t': case '\n': case '\f':
178 for(i
=1; isSpace(z
[i
]); i
++){}
179 *tokenType
= TK_SPACE
;
182 if( z
[1]==0 ) return -1;
183 *tokenType
= TK_MINUS
;
192 *tokenType
= TK_STAR
;
201 }else if( z
[1]=='>' ){
218 *tokenType
= TK_ILLEGAL
;
225 *tokenType
= TK_WILDCARD
;
228 *tokenType
= TK_COMMA
;
230 case '`': case '\'': {
240 *tokenType
= TK_STRING
;
244 if( !isDigit(z
[1]) ){
249 case '0': case '1': case '2': case '3': case '4':
250 case '5': case '6': case '7': case '8': case '9':
251 *tokenType
= TK_INTEGER
;
252 for(i
=1; isDigit(z
[i
]); i
++){}
255 for(i
=1; z
[i
] && z
[i
-1]!=']'; i
++){}
262 for(i
=1; isIdChar
[z
[i
]]; i
++){}
263 *tokenType
= sqliteKeywordCode(z
, i
);
264 if( *tokenType
== TK_ID
&& z
[i
] == '`' ) *skip
= 1;
267 *tokenType
= TK_ILLEGAL
;