2010-06-04 Jb Evain <jbevain@novell.com>
[mcs.git] / ilasm / scanner / ILTokenizer.cs
blob0d2730d6cee2dcddd638f9048fa19f0a92701190
1 // ILTokenizer.cs
2 // Author: Sergey Chaban (serge@wildwestsoftware.com)
4 using System;
5 using System.IO;
6 using System.Text;
7 using System.Collections;
8 using System.Globalization;
10 namespace Mono.ILASM {
12 public delegate void NewTokenEvent (object sender, NewTokenEventArgs args);
14 public class NewTokenEventArgs : EventArgs {
16 public readonly ILToken Token;
18 public NewTokenEventArgs (ILToken token)
20 Token = token;
24 /// <summary>
25 /// </summary>
26 public class ILTokenizer : ITokenStream {
28 private static readonly string idchars = "_$@?.`";
30 private static Hashtable keywords;
31 private static Hashtable directives;
33 private ILToken lastToken;
34 private ILReader reader;
35 private StringHelper strBuilder;
36 private NumberHelper numBuilder;
37 private bool in_byte_array;
39 public event NewTokenEvent NewTokenEvent;
41 static ILTokenizer()
43 keywords = ILTables.Keywords;
44 directives = ILTables.Directives;
47 /// <summary>
48 /// </summary>
49 /// <param name="reader"></param>
50 public ILTokenizer (StreamReader reader)
52 this.reader = new ILReader (reader);
53 strBuilder = new StringHelper (this);
54 numBuilder = new NumberHelper (this);
55 lastToken = ILToken.Invalid.Clone () as ILToken;
58 public ILReader Reader {
59 get {
60 return reader;
64 public Location Location {
65 get {
66 return reader.Location;
70 public bool InByteArray {
71 get { return in_byte_array; }
72 set { in_byte_array = value; }
75 public ILToken GetNextToken ()
77 if (lastToken == ILToken.EOF) return ILToken.EOF;
79 int ch;
80 int next;
81 ILToken res = ILToken.EOF.Clone () as ILToken;
84 while ((ch = reader.Read ()) != -1) {
86 // Comments
87 if (ch == '/') {
88 next = reader.Peek ();
89 if (next == '/') {
90 // double-slash comment, skip to the end of the line.
91 for (reader.Read ();
92 next != -1 && next != '\n';
93 next = reader.Read ());
94 continue;
95 } else if (next == '*') {
96 reader.Read ();
97 for (next = reader.Read (); next != -1; next = reader.Read ()) {
98 if (next == '*' && reader.Peek () == '/') {
99 reader.Read ();
100 goto end;
103 end:
104 continue;
108 // HEXBYTES are flagged by the parser otherwise it is
109 // impossible to figure them out
110 if (in_byte_array) {
111 string hx = String.Empty;
113 if (Char.IsWhiteSpace ((char) ch))
114 continue;
116 if (ch == ')') {
117 res = ILToken.CloseParens;
118 break;
121 if (!is_hex (ch))
122 throw new ILTokenizingException (reader.Location, ((char) ch).ToString ());
123 hx += (char) ch;
124 if (is_hex (reader.Peek ()))
125 hx += (char) reader.Read ();
126 else if (!Char.IsWhiteSpace ((char) reader.Peek ()) && reader.Peek () != ')')
127 throw new ILTokenizingException (reader.Location,
128 ((char) reader.Peek ()).ToString ());
129 res.token = Token.HEXBYTE;
130 res.val = Byte.Parse (hx, NumberStyles.HexNumber);
132 while (Char.IsWhiteSpace ((char) reader.Peek ()))
133 reader.Read ();
134 break;
137 // Ellipsis
138 if (ch == '.' && reader.Peek () == '.') {
139 reader.MarkLocation ();
140 int ch2 = reader.Read ();
141 if (reader.Peek () == '.') {
142 res = ILToken.Ellipsis;
143 reader.Read ();
144 break;
146 reader.Unread (ch2);
147 reader.RestoreLocation ();
150 if (ch == '.' || ch == '#') {
151 next = reader.Peek ();
152 if (ch == '.' && Char.IsDigit((char) next)) {
153 numBuilder.Start (ch);
154 reader.Unread (ch);
155 numBuilder.Build ();
156 if (numBuilder.ResultToken != ILToken.Invalid) {
157 res.CopyFrom (numBuilder.ResultToken);
158 break;
160 } else {
161 if (strBuilder.Start (next) && strBuilder.TokenId == Token.ID) {
162 reader.MarkLocation ();
163 string dirBody = strBuilder.Build ();
164 string dir = new string ((char) ch, 1) + dirBody;
165 if (IsDirective (dir)) {
166 res = ILTables.Directives [dir] as ILToken;
167 } else {
168 reader.Unread (dirBody.ToCharArray ());
169 reader.RestoreLocation ();
170 res = ILToken.Dot;
172 } else {
173 res = ILToken.Dot;
175 break;
179 // Numbers && Hexbytes
180 if (numBuilder.Start (ch)) {
181 if ((ch == '-') && !(Char.IsDigit ((char) reader.Peek ()))) {
182 res = ILToken.Dash;
183 break;
184 } else {
185 reader.Unread (ch);
186 numBuilder.Build ();
187 if (numBuilder.ResultToken != ILToken.Invalid) {
188 res.CopyFrom (numBuilder.ResultToken);
189 break;
194 // Punctuation
195 ILToken punct = ILToken.GetPunctuation (ch);
196 if (punct != null) {
197 if (punct == ILToken.Colon && reader.Peek () == ':') {
198 reader.Read ();
199 res = ILToken.DoubleColon;
200 } else {
201 res = punct;
203 break;
206 // ID | QSTRING | SQSTRING | INSTR_* | KEYWORD
207 if (strBuilder.Start (ch)) {
208 reader.Unread (ch);
209 string val = strBuilder.Build ();
210 if (strBuilder.TokenId == Token.ID) {
211 ILToken opcode;
212 next = reader.Peek ();
213 if (next == '.') {
214 reader.MarkLocation ();
215 reader.Read ();
216 next = reader.Peek ();
217 if (IsIdChar ((char) next)) {
218 string opTail = BuildId ();
219 string full_str = String.Format ("{0}.{1}", val, opTail);
220 opcode = InstrTable.GetToken (full_str);
222 if (opcode == null) {
223 if (strBuilder.TokenId != Token.ID) {
224 reader.Unread (opTail.ToCharArray ());
225 reader.Unread ('.');
226 reader.RestoreLocation ();
227 res.val = val;
228 } else {
229 res.token = Token.COMP_NAME;
230 res.val = full_str;
232 break;
233 } else {
234 res = opcode;
235 break;
238 } else if (Char.IsWhiteSpace ((char) next)) {
239 // Handle 'tail.' and 'unaligned.'
240 opcode = InstrTable.GetToken (val + ".");
241 if (opcode != null) {
242 res = opcode;
243 break;
245 // Let the parser handle the dot
246 reader.Unread ('.');
249 opcode = InstrTable.GetToken (val);
250 if (opcode != null) {
251 res = opcode;
252 break;
254 if (IsKeyword (val)) {
255 res = ILTables.Keywords [val] as ILToken;
256 break;
260 res.token = strBuilder.TokenId;
261 res.val = val;
262 break;
266 OnNewToken (res);
267 lastToken.CopyFrom (res);
268 return res;
272 /// <summary>
273 /// </summary>
274 public ILToken NextToken {
275 get {
276 return GetNextToken ();
281 /// <summary>
282 /// </summary>
283 public ILToken LastToken {
284 get {
285 return lastToken;
289 bool is_hex (int e)
291 return (e >= '0' && e <= '9') || (e >= 'A' && e <= 'F') || (e >= 'a' && e <= 'f');
294 private static bool IsIdStartChar (char ch)
296 return (Char.IsLetter (ch) || (idchars.IndexOf (ch) != -1));
300 private static bool IsIdChar (char ch)
302 return (Char.IsLetterOrDigit (ch) || (idchars.IndexOf (ch) != -1));
305 /// <summary>
306 /// </summary>
307 /// <param name="name"></param>
308 /// <returns></returns>
309 public static bool IsOpcode (string name)
311 return InstrTable.IsInstr (name);
315 /// <summary>
316 /// </summary>
317 /// <param name="name"></param>
318 /// <returns></returns>
319 public static bool IsDirective (string name)
321 char ch = name [0];
322 bool res = (ch == '.' || ch == '#');
324 if (res) {
325 res = directives.Contains (name);
328 return res;
331 private string BuildId ()
333 StringBuilder idsb = new StringBuilder ();
334 int ch, last;
336 last = -1;
337 while ((ch = reader.Read ()) != -1) {
338 if (IsIdChar ((char) ch) || ch == '.') {
339 idsb.Append ((char) ch);
340 } else {
341 reader.Unread (ch);
342 // Never end an id on a DOT
343 if (last == '.') {
344 reader.Unread (last);
345 idsb.Length -= 1;
347 break;
349 last = ch;
352 return idsb.ToString ();
355 /// <summary>
356 /// </summary>
357 /// <param name="name"></param>
358 /// <returns></returns>
359 public static bool IsKeyword (string name)
361 return keywords.Contains (name);
364 private void OnNewToken (ILToken token)
366 if (NewTokenEvent != null)
367 NewTokenEvent (this, new NewTokenEventArgs (token));