Allow schema files that are missing checksums on the !!SCHEMAMATIC line.
[versaplex.git] / versaplexd / vxsqltokenizer.cs
blob8511e66141e7f6abc96ff5f982e41b978777c07d
1 /*
2 * Versaplex:
3 * Copyright (C)2007-2008 Versabanq Innovations Inc. and contributors.
4 * See the included file named LICENSE for license information.
5 */
6 using System;
7 using System.Collections.Generic;
8 using Wv;
10 public class VxSqlToken
12 public enum TokenType { None, Unquoted, SingleQuoted, DoubleQuoted,
13 Delimited, LParen, RParen, Comma, Semicolon,
14 Keyword, DelimitedComment, Comment,
15 Relop, Not, Bitop, Addop, Multop,
16 Numeric, Scientific, Period, ERROR_UNKNOWN };
17 public TokenType type;
18 public string name;
19 public string leading_space;
20 public string trailing_space;
22 public VxSqlToken(TokenType t, string n, string l)
24 type = t;
25 name = n;
26 leading_space = l;
27 trailing_space = "";
30 public bool IsValidIdentifier()
32 return type == TokenType.DoubleQuoted || type == TokenType.Unquoted
33 || type == TokenType.Delimited;
36 public bool NotQuotedAndLowercaseEq(string eq)
38 return type != TokenType.SingleQuoted && type != TokenType.DoubleQuoted
39 && type != TokenType.Delimited && type != TokenType.Comment
40 && type != TokenType.DelimitedComment
41 && name.ToLower() == eq.ToLower();
44 public bool IsKeyword()
46 return type == TokenType.Keyword;
49 public bool IsKeywordEq(string key)
51 return IsKeyword() && key.ToUpper() == name.ToUpper();
54 public bool IsIdentifier()
56 return type == TokenType.DoubleQuoted || type == TokenType.Delimited ||
57 type == TokenType.Unquoted;
60 public bool IsComment()
62 return type == TokenType.Comment || type == TokenType.DelimitedComment;
65 public override string ToString()
67 string cool = name;
68 if (type == TokenType.SingleQuoted)
69 cool = String.Format("'{0}'", name);
70 else if (type == TokenType.DoubleQuoted)
71 cool = String.Format("\"{0}\"", name);
72 else if (type == TokenType.Delimited)
73 cool = String.Format("[{0}]", name);
74 else if (type == TokenType.DelimitedComment)
75 cool = String.Format("/*{0}*/", name);
76 else if (type == TokenType.Comment)
77 cool = String.Format("--{0}", name);
78 //if (type == TokenType.Keyword)
79 // return name.ToUpper();
80 return leading_space + cool + trailing_space;
83 public static implicit operator string(VxSqlToken t)
85 return t.ToString();
89 public class VxSqlTokenizer
91 private static string[] sqlkeywords = {
92 "ADD", "EXCEPT", "PERCENT",
93 "ALL", "EXEC", "PLAN",
94 "ALTER", "EXECUTE", "PRECISION",
95 "AND", "EXISTS", "PRIMARY",
96 "ANY", "EXIT", "PRINT",
97 "AS", "FETCH", "PROC",
98 "ASC", "FILE", "PROCEDURE",
99 "AUTHORIZATION", "FILLFACTOR", "PUBLIC",
100 "BACKUP", "FOR", "RAISERROR",
101 "BEGIN", "FOREIGN", "READ",
102 "BETWEEN", "FREETEXT", "READTEXT",
103 "BREAK", "FREETEXTTABLE", "RECONFIGURE",
104 "BROWSE", "FROM", "REFERENCES",
105 "BULK", "FULL", "REPLICATION",
106 "BY", "FUNCTION", "RESTORE",
107 "CASCADE", "GOTO", "RESTRICT",
108 "CASE", "GRANT", "RETURN",
109 "CHECK", "GROUP", "REVOKE",
110 "CHECKPOINT", "HAVING", "RIGHT",
111 "CLOSE", "HOLDLOCK", "ROLLBACK",
112 "CLUSTERED", "IDENTITY", "ROWCOUNT",
113 "COALESCE", "IDENTITY_INSERT", "ROWGUIDCOL",
114 "COLLATE", "IDENTITYCOL", "RULE",
115 "COLUMN", "IF", "SAVE",
116 "COMMIT", "IN", "SCHEMA",
117 "COMPUTE", "INDEX", "SELECT",
118 "CONSTRAINT", "INNER", "SESSION_USER",
119 "CONTAINS", "INSERT", "SET",
120 "CONTAINSTABLE", "INTERSECT", "SETUSER",
121 "CONTINUE", "INTO", "SHUTDOWN",
122 "CONVERT", "IS", "SOME",
123 "CREATE", "JOIN", "STATISTICS",
124 "CROSS", "KEY", "SYSTEM_USER",
125 "CURRENT", "KILL", "TABLE",
126 "CURRENT_DATE", "LEFT", "TEXTSIZE",
127 "CURRENT_TIME", "LIKE", "THEN",
128 "CURRENT_TIMESTAMP", "LINENO", "TO",
129 "CURRENT_USER", "LOAD", "TOP",
130 "CURSOR", "NATIONAL", "TRAN",
131 "DATABASE", "NOCHECK", "TRANSACTION",
132 "DBCC", "NONCLUSTERED", "TRIGGER",
133 "DEALLOCATE", "NOT", "TRUNCATE",
134 "DECLARE", "NULL", "TSEQUAL",
135 "DEFAULT", "NULLIF", "UNION",
136 "DELETE", "OF", "UNIQUE",
137 "DENY", "OFF", "UPDATE",
138 "DESC", "OFFSETS", "UPDATETEXT",
139 "DISK", "ON", "USE",
140 "DISTINCT", "OPEN", "USER",
141 "DISTRIBUTED", "OPENDATASOURCE", "VALUES",
142 "DOUBLE", "OPENQUERY", "VARYING",
143 "DROP", "OPENROWSET", "VIEW",
144 "DUMMY", "OPENXML", "WAITFOR",
145 "DUMP", "OPTION", "WHEN",
146 "ELSE", "OR", "WHERE",
147 "END", "ORDER", "WHILE",
148 "ERRLVL", "OUTER", "WITH",
149 "ESCAPE", "OVER", "WRITETEXT",
152 private List<VxSqlToken> tokens;
153 private VxSqlToken last;
155 string cur;
156 string curspace;
157 VxSqlToken.TokenType curstate;
159 private void reset_state()
161 cur = "";
162 curspace = "";
163 curstate = VxSqlToken.TokenType.None;
166 private void save_and_reset_state()
168 last = new VxSqlToken(curstate, cur, curspace);
169 tokens.Add(last);
170 reset_state();
173 private VxSqlToken.TokenType get_singletoken_state(char c)
175 switch (c)
177 case '(':
178 return VxSqlToken.TokenType.LParen;
179 case ')':
180 return VxSqlToken.TokenType.RParen;
181 case '&':
182 case '|':
183 case '^':
184 case '~':
185 return VxSqlToken.TokenType.Bitop;
186 case '*':
187 case '/':
188 case '%':
189 return VxSqlToken.TokenType.Multop;
190 case '+':
191 case '-':
192 return VxSqlToken.TokenType.Addop;
193 case '=':
194 case '>':
195 case '<':
196 return VxSqlToken.TokenType.Relop;
197 case '!':
198 return VxSqlToken.TokenType.Not;
199 case ',':
200 return VxSqlToken.TokenType.Comma;
201 case ';':
202 return VxSqlToken.TokenType.Semicolon;
203 case '.':
204 return VxSqlToken.TokenType.Period;
205 default:
206 return VxSqlToken.TokenType.None;
210 private void decipher_unquoted_state()
212 string curupper = cur.ToUpper();
213 foreach (string word in sqlkeywords)
214 if (curupper == word)
216 curstate = VxSqlToken.TokenType.Keyword;
217 break;
221 public void tokenize(string q)
223 last = null;
224 tokens = new List<VxSqlToken>();
226 reset_state();
228 for (int i = 0; i < q.Length; ++i)
230 char c = q[i];
231 char peek = i < q.Length - 1 ? q[i + 1] : '\0';
232 switch (curstate)
234 case VxSqlToken.TokenType.None:
235 VxSqlToken.TokenType singletoken_state = get_singletoken_state(c);
236 if (isalpha(c) || c == '_' || c == '@' || c == '#')
238 curstate = VxSqlToken.TokenType.Unquoted;
239 cur += c;
240 if (!isidentifierchar(peek))
242 decipher_unquoted_state();
243 save_and_reset_state();
246 else if (c == '\'')
247 curstate = VxSqlToken.TokenType.SingleQuoted;
248 else if (c == '"')
249 curstate = VxSqlToken.TokenType.DoubleQuoted;
250 else if (c == '[')
251 curstate = VxSqlToken.TokenType.Delimited;
252 else if (c == '-' && peek == '-')
254 ++i;
255 curstate = VxSqlToken.TokenType.Comment;
257 else if (c == '/' && peek == '*')
259 ++i;
260 curstate = VxSqlToken.TokenType.DelimitedComment;
262 else if (isdigit(c))
264 curstate = VxSqlToken.TokenType.Numeric;
265 cur += c;
266 if (!isnumericchar(peek))
267 save_and_reset_state();
269 else if (c == '.' && isdigit(peek))
271 curstate = VxSqlToken.TokenType.Numeric;
272 cur += c;
274 else if (singletoken_state != VxSqlToken.TokenType.None)
276 cur += c;
277 curstate = singletoken_state;
278 save_and_reset_state();
280 else if (!isspace(c))
282 cur += c;
283 curstate = VxSqlToken.TokenType.ERROR_UNKNOWN;
284 save_and_reset_state();
286 else //whitespace
287 curspace += c;
288 break;
289 case VxSqlToken.TokenType.Unquoted:
290 cur += c;
291 if (!isidentifierchar(peek))
293 decipher_unquoted_state();
294 save_and_reset_state();
296 break;
297 case VxSqlToken.TokenType.SingleQuoted:
298 if (c == '\'')
300 if (peek == '\'')
302 cur += "''";
303 ++i;
305 else
306 save_and_reset_state();
308 else
309 cur += c;
310 break;
311 case VxSqlToken.TokenType.DoubleQuoted:
312 if (c == '"')
314 if (peek == '"')
316 cur += "\"\"";
317 ++i;
319 else
320 save_and_reset_state();
322 else
323 cur += c;
324 break;
325 case VxSqlToken.TokenType.Delimited:
326 if (c == ']')
327 save_and_reset_state();
328 else
329 cur += c;
330 break;
331 case VxSqlToken.TokenType.Comment:
332 // NOTE: In T-Sql, a 'GO' command within a comment generates an
333 // error, but our tokenizer doesn't need to care about this.
334 if (c == '\n')
336 save_and_reset_state();
337 curspace += c;
339 else
340 cur += c;
341 break;
342 case VxSqlToken.TokenType.DelimitedComment:
343 // NOTE: In T-Sql, a 'GO' command within a comment generates an
344 // error, but our tokenizer doesn't need to care about this.
345 if (c == '*' && peek == '/')
347 ++i;
348 save_and_reset_state();
350 else
351 cur += c;
352 break;
353 case VxSqlToken.TokenType.Numeric:
354 if (c == 'E' || c == 'e')
356 curstate = VxSqlToken.TokenType.Scientific;
357 cur += 'e';
358 if (peek == '-' || peek == '+')
360 ++i;
361 cur += peek;
363 else if (!isdigit(peek))
365 curstate = VxSqlToken.TokenType.Numeric;
366 save_and_reset_state();
369 else
371 cur += c;
372 if (!isnumericchar(peek))
373 save_and_reset_state();
375 break;
376 case VxSqlToken.TokenType.Scientific:
377 cur += c;
378 if (!isdigit(peek))
380 curstate = VxSqlToken.TokenType.Numeric;
381 save_and_reset_state();
383 break;
384 default:
385 break;
389 if (curstate != VxSqlToken.TokenType.None)
390 save_and_reset_state();
391 if (curspace != "")
393 if (last != null)
394 last.trailing_space = curspace;
395 else
397 last = new VxSqlToken(VxSqlToken.TokenType.None, "", curspace);
398 tokens.Add(last);
403 Console.WriteLine("GOT A REQUEST FROM VERSAPLEX:");
404 Console.WriteLine("Original Query: {0}", q);
405 Console.WriteLine("Broken down:");
406 foreach (VxSqlToken t in tokens)
408 Console.WriteLine("Token: {0}: {1}", t.type.ToString(), t.name);
413 public void tokenize(string fmt, params object[] args)
415 tokenize(wv.fmt(fmt, args));
418 public VxSqlTokenizer(string query)
420 tokenize(query);
423 public VxSqlTokenizer()
427 public List<VxSqlToken> gettokens()
429 return tokens;
432 private static bool isalpha(char c)
434 return char.IsLetter(c);
437 private static bool isidentifierchar(char c)
439 return char.IsLetterOrDigit(c) || c == '@' || c == '#' || c == '_' ||
440 c == '$';
443 private static bool isnumericchar(char c)
445 return isdigit(c) || c == '.' || c == 'e' || c == 'E';
448 private static bool isspace(char c)
450 return char.IsWhiteSpace(c);
453 private static bool isdigit(char c)
455 return char.IsDigit(c);
460 public class Maintenance
462 public static void Main(string[] args)
464 VxSqlTokenizer me = new VxSqlTokenizer();
465 me.tokenize("create procedure Func1 as select 'Hello, world, this is Func1!'\n");
467 foreach (VxSqlToken t in me.gettokens())
469 Console.WriteLine("VxSqlToken: {0}: {1}", t.type, t);
472 } */