2 // Mono.MonoBASIC.Tokenizer.cs: The Tokenizer for the MonoBASIC compiler
4 // Author: A Rafael D Teixeira (rafaelteixeirabr@hotmail.com)
6 // Based on cs-tokenizer.cs by Miguel de Icaza (miguel@gnu.org)
8 // Licensed under the terms of the GNU GPL
10 // Copyright (C) 2001 A Rafael D Teixeira
13 namespace Mono
.MonoBASIC
17 using System
.Collections
;
19 using System
.Globalization
;
24 /// Tokenizer for MonoBASIC source code.
27 public class Tokenizer
: yyParser
.yyInput
35 public int current_token
= Token
.ERROR
;
36 bool handle_get_set
= false;
37 bool cant_have_a_type_character
= false;
39 public int ExpandedTabsSize
= 4;
41 public string location
{
45 if (current_token
== Token
.ERROR
)
46 det
= "detail: " + error_details
;
50 return "Line: "+line
+" Col: "+col
+ "\n" +
51 "VirtLine: "+ref_line
+
52 " Token: "+current_token
+ " " + det
;
56 public bool properties
{
58 return handle_get_set
;
62 handle_get_set
= value;
69 static Hashtable keywords
;
70 static NumberStyles styles
;
71 static NumberFormatInfo csharp_format_info
;
74 // Values for the associated token returned
77 int putback_char
= -1;
81 // Details about the error encoutered by the tokenizer
92 public string Source
{
100 Location
.SetCurrentSource(file_name
);
104 public string EffectiveSource
{
110 Location
.SetCurrentSource(ref_name
);
120 public int EffectiveLine
{
135 static void initTokens ()
137 keywords
= new Hashtable ();
139 keywords
.Add ("addhandler", Token
.ADDHANDLER
);
140 keywords
.Add ("addressof", Token
.ADDRESSOF
);
141 keywords
.Add ("alias", Token
.ALIAS
);
142 keywords
.Add ("and", Token
.AND
);
143 keywords
.Add ("andalso", Token
.ANDALSO
);
144 keywords
.Add ("ansi", Token
.ANSI
);
145 keywords
.Add ("as", Token
.AS
);
146 keywords
.Add ("assembly", Token
.ASSEMBLY
);
147 keywords
.Add ("auto", Token
.AUTO
);
148 keywords
.Add ("binary", Token
.BINARY
); // Not a VB.NET Keyword
149 keywords
.Add ("boolean", Token
.BOOLEAN
);
150 keywords
.Add ("byref", Token
.BYREF
);
151 keywords
.Add ("byte", Token
.BYTE
);
152 keywords
.Add ("byval", Token
.BYVAL
);
153 keywords
.Add ("call", Token
.CALL
);
154 keywords
.Add ("case", Token
.CASE
);
155 keywords
.Add ("catch", Token
.CATCH
);
156 keywords
.Add ("cbool", Token
.CBOOL
);
157 keywords
.Add ("cbyte", Token
.CBYTE
);
158 keywords
.Add ("cchar", Token
.CCHAR
);
159 keywords
.Add ("cdate", Token
.CDATE
);
160 keywords
.Add ("cdec", Token
.CDEC
);
161 keywords
.Add ("cdbl", Token
.CDBL
);
162 keywords
.Add ("char", Token
.CHAR
);
163 keywords
.Add ("cint", Token
.CINT
);
164 keywords
.Add ("class", Token
.CLASS
);
165 keywords
.Add ("clng", Token
.CLNG
);
166 keywords
.Add ("cobj", Token
.COBJ
);
167 keywords
.Add ("compare", Token
.COMPARE
); // Not a VB.NET Keyword
168 keywords
.Add ("const", Token
.CONST
);
169 keywords
.Add ("cshort", Token
.CSHORT
);
170 keywords
.Add ("csng", Token
.CSNG
);
171 keywords
.Add ("cstr", Token
.CSTR
);
172 keywords
.Add ("ctype", Token
.CTYPE
);
173 keywords
.Add ("date", Token
.DATE
);
174 keywords
.Add ("decimal", Token
.DECIMAL
);
175 keywords
.Add ("declare", Token
.DECLARE
);
176 keywords
.Add ("default", Token
.DEFAULT
);
177 keywords
.Add ("delegate", Token
.DELEGATE
);
178 keywords
.Add ("dim", Token
.DIM
);
179 keywords
.Add ("directcast", Token
.DIRECTCAST
);
180 keywords
.Add ("do", Token
.DO
);
181 keywords
.Add ("double", Token
.DOUBLE
);
182 keywords
.Add ("each", Token
.EACH
);
183 keywords
.Add ("else", Token
.ELSE
);
184 keywords
.Add ("elseif", Token
.ELSEIF
);
185 keywords
.Add ("end", Token
.END
);
186 keywords
.Add ("endif", Token
.ENDIF
); // An unused VB.NET keyword
187 keywords
.Add ("enum", Token
.ENUM
);
188 keywords
.Add ("erase", Token
.ERASE
);
189 keywords
.Add ("error", Token
.ERROR
);
190 keywords
.Add ("event", Token
.EVENT
);
191 keywords
.Add ("exit", Token
.EXIT
);
192 keywords
.Add ("explicit", Token
.EXPLICIT
); // Not a VB.NET keyword
193 keywords
.Add ("false", Token
.FALSE
);
194 keywords
.Add ("finally", Token
.FINALLY
);
195 keywords
.Add ("for", Token
.FOR
);
196 keywords
.Add ("friend", Token
.FRIEND
);
197 keywords
.Add ("function", Token
.FUNCTION
);
198 keywords
.Add ("get", Token
.GET
);
199 keywords
.Add ("gettype", Token
.GETTYPE
);
200 keywords
.Add ("gosub", Token
.GOSUB
); // An unused VB.NET keyword
201 keywords
.Add ("goto", Token
.GOTO
);
202 keywords
.Add ("handles", Token
.HANDLES
);
203 keywords
.Add ("if", Token
.IF
);
204 keywords
.Add ("implements", Token
.IMPLEMENTS
);
205 keywords
.Add ("imports", Token
.IMPORTS
);
206 keywords
.Add ("in", Token
.IN
);
207 keywords
.Add ("inherits", Token
.INHERITS
);
208 keywords
.Add ("integer", Token
.INTEGER
);
209 keywords
.Add ("interface", Token
.INTERFACE
);
210 keywords
.Add ("is", Token
.IS
);
211 keywords
.Add ("let ", Token
.LET
); // An unused VB.NET keyword
212 keywords
.Add ("lib ", Token
.LIB
);
213 keywords
.Add ("like ", Token
.LIKE
);
214 keywords
.Add ("long", Token
.LONG
);
215 keywords
.Add ("loop", Token
.LOOP
);
216 keywords
.Add ("me", Token
.ME
);
217 keywords
.Add ("mod", Token
.MOD
);
218 keywords
.Add ("module", Token
.MODULE
);
219 keywords
.Add ("mustinherit", Token
.MUSTINHERIT
);
220 keywords
.Add ("mustoverride", Token
.MUSTOVERRIDE
);
221 keywords
.Add ("mybase", Token
.MYBASE
);
222 keywords
.Add ("myclass", Token
.MYCLASS
);
223 keywords
.Add ("namespace", Token
.NAMESPACE
);
224 keywords
.Add ("new", Token
.NEW
);
225 keywords
.Add ("next", Token
.NEXT
);
226 keywords
.Add ("not", Token
.NOT
);
227 keywords
.Add ("nothing", Token
.NOTHING
);
228 keywords
.Add ("notinheritable", Token
.NOTINHERITABLE
);
229 keywords
.Add ("notoverridable", Token
.NOTOVERRIDABLE
);
230 keywords
.Add ("object", Token
.OBJECT
);
231 keywords
.Add ("off", Token
.OFF
); // Not a VB.NET Keyword
232 keywords
.Add ("on", Token
.ON
);
233 keywords
.Add ("option", Token
.OPTION
);
234 keywords
.Add ("optional", Token
.OPTIONAL
);
235 keywords
.Add ("or", Token
.OR
);
236 keywords
.Add ("orelse", Token
.ORELSE
);
237 keywords
.Add ("overloads", Token
.OVERLOADS
);
238 keywords
.Add ("overridable", Token
.OVERRIDABLE
);
239 keywords
.Add ("overrides", Token
.OVERRIDES
);
240 keywords
.Add ("paramarray", Token
.PARAM_ARRAY
);
241 keywords
.Add ("preserve", Token
.PRESERVE
);
242 keywords
.Add ("private", Token
.PRIVATE
);
243 keywords
.Add ("property", Token
.PROPERTY
);
244 keywords
.Add ("protected", Token
.PROTECTED
);
245 keywords
.Add ("public", Token
.PUBLIC
);
246 keywords
.Add ("raiseevent", Token
.RAISEEVENT
);
247 keywords
.Add ("readonly", Token
.READONLY
);
248 keywords
.Add ("redim", Token
.REDIM
);
249 keywords
.Add ("rem", Token
.REM
);
250 keywords
.Add ("removehandler", Token
.REMOVEHANDLER
);
251 keywords
.Add ("resume", Token
.RESUME
);
252 keywords
.Add ("return", Token
.RETURN
);
253 keywords
.Add ("select", Token
.SELECT
);
254 keywords
.Add ("set", Token
.SET
);
255 keywords
.Add ("shadows", Token
.SHADOWS
);
256 keywords
.Add ("shared", Token
.SHARED
);
257 keywords
.Add ("short", Token
.SHORT
);
258 keywords
.Add ("single", Token
.SINGLE
);
259 keywords
.Add ("sizeof", Token
.SIZEOF
); // Not a VB.NET Keyword
260 keywords
.Add ("static", Token
.STATIC
);
261 keywords
.Add ("step", Token
.STEP
);
262 keywords
.Add ("stop", Token
.STOP
);
263 keywords
.Add ("strict", Token
.STRICT
); // Not a VB.NET Keyword
264 keywords
.Add ("string", Token
.STRING
);
265 keywords
.Add ("structure", Token
.STRUCTURE
);
266 keywords
.Add ("sub", Token
.SUB
);
267 keywords
.Add ("synclock", Token
.SYNCLOCK
);
268 keywords
.Add ("text", Token
.TEXT
); // Not a VB.NET Keyword
269 keywords
.Add ("then", Token
.THEN
);
270 keywords
.Add ("throw", Token
.THROW
);
271 keywords
.Add ("to", Token
.TO
);
272 keywords
.Add ("true", Token
.TRUE
);
273 keywords
.Add ("try", Token
.TRY
);
274 keywords
.Add ("typeof", Token
.TYPEOF
);
275 keywords
.Add ("unicode", Token
.UNICODE
);
276 keywords
.Add ("until", Token
.UNTIL
);
277 keywords
.Add ("variant", Token
.VARIANT
); // An unused VB.NET keyword
278 keywords
.Add ("wend", Token
.WEND
); // An unused VB.NET keyword
279 keywords
.Add ("when", Token
.WHEN
);
280 keywords
.Add ("while", Token
.WHILE
);
281 keywords
.Add ("with", Token
.WITH
);
282 keywords
.Add ("withevents", Token
.WITHEVENTS
);
283 keywords
.Add ("writeonly", Token
.WRITEONLY
);
284 keywords
.Add ("xor", Token
.XOR
);
286 if (Parser
.UseExtendedSyntax
){
287 keywords
.Add ("yield", Token
.YIELD
);
295 csharp_format_info
= new NumberFormatInfo ();
296 csharp_format_info
.CurrencyDecimalSeparator
= ".";
297 styles
= NumberStyles
.AllowExponent
| NumberStyles
.AllowDecimalPoint
;
300 public Tokenizer (System
.IO
.TextReader input
, string fname
, ArrayList defines
)
306 // putback an EOL at the beginning of a stream. This is a convenience that
307 // allows pre-processor directives to be added to the beginning of a vb file.
311 bool is_keyword (string name
)
314 name
= name
.ToLower();
316 res
= keywords
.Contains(name
);
317 if ((name
== "GET" || name
== "SET") && handle_get_set
== false)
322 int getKeyword (string name
)
324 return (int) (keywords
[name
.ToLower()]);
327 public Location Location
{
329 return new Location (ref_line
, col
);
333 public bool PropertyParsing
{
335 return handle_get_set
;
339 handle_get_set
= value;
343 bool is_identifier_start_character (char c
)
345 return Char
.IsLetter (c
) || c
== '_' ;
348 bool is_identifier_part_character (char c
)
350 return (Char
.IsLetter (c
) || Char
.IsDigit (c
) || c
== '_');
353 int is_punct (char c
, ref bool doread
)
360 error_details
= c
.ToString();
366 return Token
.OPEN_BRACKET
;
368 return Token
.CLOSE_BRACKET
;
370 return Token
.OPEN_BRACE
;
372 return Token
.CLOSE_BRACE
;
374 return Token
.OPEN_PARENS
;
376 return Token
.CLOSE_PARENS
;
382 if (is_identifier_start_character((char)d
) || cant_have_a_type_character
)
383 return Token
.EXCLAMATION
;
384 return Token
.SINGLETYPECHAR
;
386 if (cant_have_a_type_character
)
388 return Token
.DOLAR_SIGN
;
390 if (cant_have_a_type_character
)
392 return Token
.AT_SIGN
;
394 if (cant_have_a_type_character
)
396 return Token
.PERCENT
;
400 if (cant_have_a_type_character
)
401 return ExtractDateTimeLiteral();
403 return Token
.NUMBER_SIGN
;
411 if (!cant_have_a_type_character
)
412 return Token
.LONGTYPECHAR
;
413 t
= handle_integer_literal_in_other_bases(d
);
414 if (t
== Token
.NONE
) {
445 return Token
.OP_IDIV
;
465 return Token
.OP_SHIFT_LEFT
;
478 return Token
.OP_SHIFT_RIGHT
;
486 return Token
.ATTR_ASSIGN
;
494 bool decimal_digits (int c
)
497 bool seen_digits
= false;
500 number
.Append ((char) c
);
502 while ((d
= peekChar ()) != -1){
503 if (Char
.IsDigit ((char)d
)){
504 number
.Append ((char) d
);
514 int real_type_suffix (int c
)
520 t
= Token
.LITERAL_SINGLE
;
523 t
= Token
.LITERAL_DOUBLE
;
526 t
= Token
.LITERAL_DECIMAL
;
535 int integer_type_suffix (int c
)
543 t
= Token
.LITERAL_INTEGER
; // SHORT ?
544 val
= ((IConvertible
)val
).ToInt16(null);
547 t
= Token
.LITERAL_INTEGER
;
548 val
= ((IConvertible
)val
).ToInt32(null);
551 t
= Token
.LITERAL_INTEGER
; // LONG ?
552 val
= ((IConvertible
)val
).ToInt64(null);
555 if ((long)val
<= System
.Int32
.MaxValue
&&
556 (long)val
>= System
.Int32
.MinValue
) {
557 val
= ((IConvertible
)val
).ToInt32(null);
558 return Token
.LITERAL_INTEGER
;
560 val
= ((IConvertible
)val
).ToInt64(null);
561 return Token
.LITERAL_INTEGER
; // LONG ?
566 } catch (Exception e
) {
572 int adjust_real (int t
)
574 string s
= number
.ToString ();
577 case Token
.LITERAL_DECIMAL
:
578 val
= new System
.Decimal ();
579 val
= System
.Decimal
.Parse (
580 s
, styles
, csharp_format_info
);
582 case Token
.LITERAL_DOUBLE
:
583 val
= new System
.Double ();
584 val
= System
.Double
.Parse (
585 s
, styles
, csharp_format_info
);
587 case Token
.LITERAL_SINGLE
:
588 val
= new System
.Double ();
589 val
= (float) System
.Double
.Parse (
590 s
, styles
, csharp_format_info
);
594 val
= new System
.Double ();
595 val
= System
.Double
.Parse (
596 s
, styles
, csharp_format_info
);
597 t
= Token
.LITERAL_DOUBLE
;
605 StringBuilder hexNumber
= new StringBuilder ();
609 while ((d
= peekChar ()) != -1){
610 char e
= Char
.ToUpper ((char) d
);
612 if (Char
.IsDigit (e
) || (e
>= 'A' && e
<= 'F')){
613 hexNumber
.Append (e
);
618 return System
.Int64
.Parse (hexNumber
.ToString(), NumberStyles
.HexNumber
);
623 long valueToReturn
= 0;
627 while ((d
= peekChar ()) != -1){
629 if (Char
.IsDigit (e
) && (e
< '8')){
631 valueToReturn
+= (d
- (int)'0');
637 return valueToReturn
;
640 int handle_integer_literal_in_other_bases(int peek
)
642 if (peek
== 'h' || peek
== 'H'){
645 return integer_type_suffix (peekChar ());
648 if (peek
== 'o' || peek
== 'O'){
650 val
= octal_digits ();
651 return integer_type_suffix (peekChar ());
658 // Invoked if we know we have .digits or digits
660 int is_number (int c
)
662 bool is_real
= false;
663 number
= new StringBuilder ();
668 if (Char
.IsDigit ((char)c
)){
674 // We need to handle the case of
675 // "1.1" vs "1.ToString()" (LITERAL_SINGLE vs NUMBER DOT IDENTIFIER)
678 if (decimal_digits (getChar())){
684 val
= System
.Int64
.Parse(number
.ToString());
685 return integer_type_suffix('.');
689 if (c
== 'e' || c
== 'E'){
696 number
.Append ((char) c
);
699 } else if (c
== '-'){
700 number
.Append ((char) c
);
708 type
= real_type_suffix (c
);
709 if (type
== Token
.NONE
&& !is_real
){
710 val
= System
.Int64
.Parse(number
.ToString());
711 return integer_type_suffix(c
);
714 return adjust_real (type
);
719 if (putback_char
!= -1){
720 int x
= putback_char
;
725 return reader
.Read ();
730 if (putback_char
!= -1)
732 return reader
.Peek ();
737 if (putback_char
!= -1)
738 throw new Exception ("This should not happen putback on putback");
742 public bool advance ()
744 return current_token
!= Token
.EOF
;
747 public Object Value
{
753 public Object
value ()
758 private bool IsEOL(int currentChar
)
762 if (currentChar
== 0x0D) {
763 if (peekChar() == 0x0A) // if it is a CR-LF pair consume LF also
769 retVal
= (currentChar
== -1 || currentChar
== 0x0A || currentChar
== 0x2028 || currentChar
== 0x2029);
779 private int DropComments()
782 while (!IsEOL(d
= getChar ()))
790 int lastToken
= current_token
;
793 current_token
= xtoken ();
794 if (current_token
== 0)
796 if (current_token
== Token
.REM
)
797 current_token
= DropComments();
798 } while (lastToken
== Token
.EOL
&& current_token
== Token
.EOL
);
800 return current_token
;
803 private string GetIdentifier()
806 if (is_identifier_start_character ((char) c
))
807 return GetIdentifier(c
);
812 private string GetIdentifier(int c
)
814 StringBuilder id
= new StringBuilder ();
816 id
.Append ((char) c
);
818 while ((c
= peekChar ()) != -1)
820 if (is_identifier_part_character ((char) c
))
822 id
.Append ((char)getChar ());
829 cant_have_a_type_character
= false;
831 return id
.ToString();
834 private bool is_doublequote(int currentChar
)
836 return (currentChar
== '"' ||
837 currentChar
== 0x201C || // unicode left double-quote character
838 currentChar
== 0x201D); // unicode right double-quote character
841 private bool is_whitespace(int c
)
843 return (c
== ' ' || c
== '\t' || c
== '\v' || c
== '\r' || c
== 0xa0);
846 private bool tokens_seen
= false;
848 private void nextLine()
850 cant_have_a_type_character
= true;
864 for (;(c
= getChar ()) != -1; col
++) {
866 // Handle line continuation character
870 if (!is_identifier_part_character((char)d
)) {
871 while ((c
= getChar ()) != -1 && !IsEOL(c
)) {}
877 if (is_whitespace(c
)) {
878 // expand tabs for location
880 col
= (((col
+ ExpandedTabsSize
) / ExpandedTabsSize
) * ExpandedTabsSize
) - 1;
881 cant_have_a_type_character
= true;
885 // Handle line comments.
892 if (current_token
== Token
.EOL
) // if last token was also EOL keep skipping
897 // Handle escaped identifiers
900 if ((val
= GetIdentifier()) == null)
902 if ((c
= getChar()) != ']')
905 return Token
.IDENTIFIER
;
908 // Handle unescaped identifiers
909 if (is_identifier_start_character ((char) c
))
912 if ((id
= GetIdentifier(c
)) == null)
916 if (is_keyword(id
) && (current_token
!= Token
.DOT
))
917 return getKeyword(id
);
918 return Token
.IDENTIFIER
;
921 // Treat string literals
922 if (is_doublequote(c
)) {
923 cant_have_a_type_character
= true;
924 return ExtractStringOrCharLiteral(c
);
927 // handle numeric literals
930 cant_have_a_type_character
= true;
932 if (Char
.IsDigit ((char) peekChar ()))
933 return is_number (c
);
937 if (Char
.IsDigit ((char) c
))
939 cant_have_a_type_character
= true;
941 return is_number (c
);
944 if ((t
= is_punct ((char)c
, ref doread
)) != Token
.ERROR
) {
945 cant_have_a_type_character
= true;
958 error_details
= ((char)c
).ToString ();
962 if (current_token
!= Token
.EOL
) // if last token wasn't EOL send it before EOF
968 private int ExtractDateTimeLiteral()
972 StringBuilder sb
= new StringBuilder();
973 for (;(c
= getChar ()) != -1; col
++)
976 val
= ParseDateLiteral(sb
);
977 return Token
.LITERAL_DATE
;
989 private int ExtractStringOrCharLiteral(int c
)
991 StringBuilder s
= new StringBuilder ();
995 while ((c
= getChar ()) != -1){
996 if (is_doublequote(c
)){
997 if (is_doublequote(peekChar()))
1000 //handle Char Literals
1001 if (peekChar() == 'C' || peekChar() == 'c') {
1003 if (s
.Length
== 1) {
1005 return Token
.LITERAL_CHARACTER
;
1007 val
= "Incorrect length for a character literal";
1011 val
= s
.ToString ();
1012 return Token
.LITERAL_STRING
;
1021 s
.Append ((char) c
);
1027 static IFormatProvider enUSculture
= new CultureInfo("en-US", true);
1029 private DateTime
ParseDateLiteral(StringBuilder
value)
1033 return DateTime
.Parse(value.ToString(),
1035 DateTimeStyles
.NoCurrentDateDefault
| DateTimeStyles
.AllowWhiteSpaces
);
1037 catch (FormatException ex
)
1039 //TODO: What is the correct error number and message?
1040 Report
.Error (1, Location
, string.Format("Invalid date literal '{0}'", value.ToString())
1041 + Environment
.NewLine
+ ex
.ToString());
1045 Report
.Error (1, Location
, "Error parsing date literal"); //TODO: What is the correct error number and message?
1047 return new DateTime();
1050 public void PositionCursorAtNextPreProcessorDirective()
1054 for(t
= token(); t
!= Token
.HASH
&& t
!= Token
.EOF
; t
= token());
1057 throw new ApplicationException("Unexpected EOF while looking for a pre-processor directive");
1059 if(t
== Token
.HASH
) {
1060 tokens_seen
= false;