3 * Copyright (C)2007-2008 Versabanq Innovations Inc. and contributors.
4 * See the included file named LICENSE for license information.
7 using System
.Collections
.Generic
;
10 public class VxSqlToken
12 public enum TokenType
{ None
, Unquoted
, SingleQuoted
, DoubleQuoted
,
13 Delimited
, LParen
, RParen
, Comma
, Semicolon
,
14 Keyword
, DelimitedComment
, Comment
,
15 Relop
, Not
, Bitop
, Addop
, Multop
,
16 Numeric
, Scientific
, Period
, ERROR_UNKNOWN
};
17 public TokenType type
;
19 public string leading_space
;
20 public string trailing_space
;
22 public VxSqlToken(TokenType t
, string n
, string l
)
30 public bool IsValidIdentifier()
32 return type
== TokenType
.DoubleQuoted
|| type
== TokenType
.Unquoted
33 || type
== TokenType
.Delimited
;
36 public bool NotQuotedAndLowercaseEq(string eq
)
38 return type
!= TokenType
.SingleQuoted
&& type
!= TokenType
.DoubleQuoted
39 && type
!= TokenType
.Delimited
&& type
!= TokenType
.Comment
40 && type
!= TokenType
.DelimitedComment
41 && name
.ToLower() == eq
.ToLower();
44 public bool IsKeyword()
46 return type
== TokenType
.Keyword
;
49 public bool IsKeywordEq(string key
)
51 return IsKeyword() && key
.ToUpper() == name
.ToUpper();
54 public bool IsIdentifier()
56 return type
== TokenType
.DoubleQuoted
|| type
== TokenType
.Delimited
||
57 type
== TokenType
.Unquoted
;
60 public bool IsComment()
62 return type
== TokenType
.Comment
|| type
== TokenType
.DelimitedComment
;
65 public override string ToString()
68 if (type
== TokenType
.SingleQuoted
)
69 cool
= String
.Format("'{0}'", name
);
70 else if (type
== TokenType
.DoubleQuoted
)
71 cool
= String
.Format("\"{0}\"", name
);
72 else if (type
== TokenType
.Delimited
)
73 cool
= String
.Format("[{0}]", name
);
74 else if (type
== TokenType
.DelimitedComment
)
75 cool
= String
.Format("/*{0}*/", name
);
76 else if (type
== TokenType
.Comment
)
77 cool
= String
.Format("--{0}", name
);
78 //if (type == TokenType.Keyword)
79 // return name.ToUpper();
80 return leading_space
+ cool
+ trailing_space
;
83 public static implicit operator string(VxSqlToken t
)
89 public class VxSqlTokenizer
91 private static string[] sqlkeywords
= {
92 "ADD", "EXCEPT", "PERCENT",
93 "ALL", "EXEC", "PLAN",
94 "ALTER", "EXECUTE", "PRECISION",
95 "AND", "EXISTS", "PRIMARY",
96 "ANY", "EXIT", "PRINT",
97 "AS", "FETCH", "PROC",
98 "ASC", "FILE", "PROCEDURE",
99 "AUTHORIZATION", "FILLFACTOR", "PUBLIC",
100 "BACKUP", "FOR", "RAISERROR",
101 "BEGIN", "FOREIGN", "READ",
102 "BETWEEN", "FREETEXT", "READTEXT",
103 "BREAK", "FREETEXTTABLE", "RECONFIGURE",
104 "BROWSE", "FROM", "REFERENCES",
105 "BULK", "FULL", "REPLICATION",
106 "BY", "FUNCTION", "RESTORE",
107 "CASCADE", "GOTO", "RESTRICT",
108 "CASE", "GRANT", "RETURN",
109 "CHECK", "GROUP", "REVOKE",
110 "CHECKPOINT", "HAVING", "RIGHT",
111 "CLOSE", "HOLDLOCK", "ROLLBACK",
112 "CLUSTERED", "IDENTITY", "ROWCOUNT",
113 "COALESCE", "IDENTITY_INSERT", "ROWGUIDCOL",
114 "COLLATE", "IDENTITYCOL", "RULE",
115 "COLUMN", "IF", "SAVE",
116 "COMMIT", "IN", "SCHEMA",
117 "COMPUTE", "INDEX", "SELECT",
118 "CONSTRAINT", "INNER", "SESSION_USER",
119 "CONTAINS", "INSERT", "SET",
120 "CONTAINSTABLE", "INTERSECT", "SETUSER",
121 "CONTINUE", "INTO", "SHUTDOWN",
122 "CONVERT", "IS", "SOME",
123 "CREATE", "JOIN", "STATISTICS",
124 "CROSS", "KEY", "SYSTEM_USER",
125 "CURRENT", "KILL", "TABLE",
126 "CURRENT_DATE", "LEFT", "TEXTSIZE",
127 "CURRENT_TIME", "LIKE", "THEN",
128 "CURRENT_TIMESTAMP", "LINENO", "TO",
129 "CURRENT_USER", "LOAD", "TOP",
130 "CURSOR", "NATIONAL", "TRAN",
131 "DATABASE", "NOCHECK", "TRANSACTION",
132 "DBCC", "NONCLUSTERED", "TRIGGER",
133 "DEALLOCATE", "NOT", "TRUNCATE",
134 "DECLARE", "NULL", "TSEQUAL",
135 "DEFAULT", "NULLIF", "UNION",
136 "DELETE", "OF", "UNIQUE",
137 "DENY", "OFF", "UPDATE",
138 "DESC", "OFFSETS", "UPDATETEXT",
140 "DISTINCT", "OPEN", "USER",
141 "DISTRIBUTED", "OPENDATASOURCE", "VALUES",
142 "DOUBLE", "OPENQUERY", "VARYING",
143 "DROP", "OPENROWSET", "VIEW",
144 "DUMMY", "OPENXML", "WAITFOR",
145 "DUMP", "OPTION", "WHEN",
146 "ELSE", "OR", "WHERE",
147 "END", "ORDER", "WHILE",
148 "ERRLVL", "OUTER", "WITH",
149 "ESCAPE", "OVER", "WRITETEXT",
152 private List
<VxSqlToken
> tokens
;
153 private VxSqlToken last
;
157 VxSqlToken
.TokenType curstate
;
159 private void reset_state()
163 curstate
= VxSqlToken
.TokenType
.None
;
166 private void save_and_reset_state()
168 last
= new VxSqlToken(curstate
, cur
, curspace
);
173 private VxSqlToken
.TokenType
get_singletoken_state(char c
)
178 return VxSqlToken
.TokenType
.LParen
;
180 return VxSqlToken
.TokenType
.RParen
;
185 return VxSqlToken
.TokenType
.Bitop
;
189 return VxSqlToken
.TokenType
.Multop
;
192 return VxSqlToken
.TokenType
.Addop
;
196 return VxSqlToken
.TokenType
.Relop
;
198 return VxSqlToken
.TokenType
.Not
;
200 return VxSqlToken
.TokenType
.Comma
;
202 return VxSqlToken
.TokenType
.Semicolon
;
204 return VxSqlToken
.TokenType
.Period
;
206 return VxSqlToken
.TokenType
.None
;
210 private void decipher_unquoted_state()
212 string curupper
= cur
.ToUpper();
213 foreach (string word
in sqlkeywords
)
214 if (curupper
== word
)
216 curstate
= VxSqlToken
.TokenType
.Keyword
;
221 public void tokenize(string q
)
224 tokens
= new List
<VxSqlToken
>();
228 for (int i
= 0; i
< q
.Length
; ++i
)
231 char peek
= i
< q
.Length
- 1 ? q
[i
+ 1] : '\0';
234 case VxSqlToken
.TokenType
.None
:
235 VxSqlToken
.TokenType singletoken_state
= get_singletoken_state(c
);
236 if (isalpha(c
) || c
== '_' || c
== '@' || c
== '#')
238 curstate
= VxSqlToken
.TokenType
.Unquoted
;
240 if (!isidentifierchar(peek
))
242 decipher_unquoted_state();
243 save_and_reset_state();
247 curstate
= VxSqlToken
.TokenType
.SingleQuoted
;
249 curstate
= VxSqlToken
.TokenType
.DoubleQuoted
;
251 curstate
= VxSqlToken
.TokenType
.Delimited
;
252 else if (c
== '-' && peek
== '-')
255 curstate
= VxSqlToken
.TokenType
.Comment
;
257 else if (c
== '/' && peek
== '*')
260 curstate
= VxSqlToken
.TokenType
.DelimitedComment
;
264 curstate
= VxSqlToken
.TokenType
.Numeric
;
266 if (!isnumericchar(peek
))
267 save_and_reset_state();
269 else if (c
== '.' && isdigit(peek
))
271 curstate
= VxSqlToken
.TokenType
.Numeric
;
274 else if (singletoken_state
!= VxSqlToken
.TokenType
.None
)
277 curstate
= singletoken_state
;
278 save_and_reset_state();
280 else if (!isspace(c
))
283 curstate
= VxSqlToken
.TokenType
.ERROR_UNKNOWN
;
284 save_and_reset_state();
289 case VxSqlToken
.TokenType
.Unquoted
:
291 if (!isidentifierchar(peek
))
293 decipher_unquoted_state();
294 save_and_reset_state();
297 case VxSqlToken
.TokenType
.SingleQuoted
:
306 save_and_reset_state();
311 case VxSqlToken
.TokenType
.DoubleQuoted
:
320 save_and_reset_state();
325 case VxSqlToken
.TokenType
.Delimited
:
327 save_and_reset_state();
331 case VxSqlToken
.TokenType
.Comment
:
332 // NOTE: In T-Sql, a 'GO' command within a comment generates an
333 // error, but our tokenizer doesn't need to care about this.
336 save_and_reset_state();
342 case VxSqlToken
.TokenType
.DelimitedComment
:
343 // NOTE: In T-Sql, a 'GO' command within a comment generates an
344 // error, but our tokenizer doesn't need to care about this.
345 if (c
== '*' && peek
== '/')
348 save_and_reset_state();
353 case VxSqlToken
.TokenType
.Numeric
:
354 if (c
== 'E' || c
== 'e')
356 curstate
= VxSqlToken
.TokenType
.Scientific
;
358 if (peek
== '-' || peek
== '+')
363 else if (!isdigit(peek
))
365 curstate
= VxSqlToken
.TokenType
.Numeric
;
366 save_and_reset_state();
372 if (!isnumericchar(peek
))
373 save_and_reset_state();
376 case VxSqlToken
.TokenType
.Scientific
:
380 curstate
= VxSqlToken
.TokenType
.Numeric
;
381 save_and_reset_state();
389 if (curstate
!= VxSqlToken
.TokenType
.None
)
390 save_and_reset_state();
394 last
.trailing_space
= curspace
;
397 last
= new VxSqlToken(VxSqlToken
.TokenType
.None
, "", curspace
);
403 Console.WriteLine("GOT A REQUEST FROM VERSAPLEX:");
404 Console.WriteLine("Original Query: {0}", q);
405 Console.WriteLine("Broken down:");
406 foreach (VxSqlToken t in tokens)
408 Console.WriteLine("Token: {0}: {1}", t.type.ToString(), t.name);
413 public void tokenize(string fmt
, params object[] args
)
415 tokenize(wv
.fmt(fmt
, args
));
418 public VxSqlTokenizer(string query
)
423 public VxSqlTokenizer()
427 public List
<VxSqlToken
> gettokens()
432 private static bool isalpha(char c
)
434 return char.IsLetter(c
);
437 private static bool isidentifierchar(char c
)
439 return char.IsLetterOrDigit(c
) || c
== '@' || c
== '#' || c
== '_' ||
443 private static bool isnumericchar(char c
)
445 return isdigit(c
) || c
== '.' || c
== 'e' || c
== 'E';
448 private static bool isspace(char c
)
450 return char.IsWhiteSpace(c
);
453 private static bool isdigit(char c
)
455 return char.IsDigit(c
);
460 public class Maintenance
462 public static void Main(string[] args)
464 VxSqlTokenizer me = new VxSqlTokenizer();
465 me.tokenize("create procedure Func1 as select 'Hello, world, this is Func1!'\n");
467 foreach (VxSqlToken t in me.gettokens())
469 Console.WriteLine("VxSqlToken: {0}: {1}", t.type, t);